diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch | 7283 |
1 files changed, 7283 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch new file mode 100644 index 00000000..992d3920 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch @@ -0,0 +1,7283 @@ +From e1e58ece64ca912cda59f58f4b0a4037d5cdbe08 Mon Sep 17 00:00:00 2001 +From: Jun Lei <Jun.Lei@amd.com> +Date: Mon, 8 Jul 2019 15:15:42 -0400 +Subject: [PATCH 3090/4256] drm/amd/display: support "dummy pstate" + +[why] +Existing support in DC for pstate only accounts for a single latency. This is sufficient when the +variance of latency is small, or that pstate support isn't necessary for correct ASIC functionality. + +Newer ASICs violate both existing assumptions. PState support is mandatory of correct ASIC +functionality, but not all latencies have to be supported. Existing code supports a "full p state" which +allows memory clock to change, but is hard for DCN to support (as it requires very large buffers). +New code will now fall back to a "dummy p state" support when "full p state" cannot be support. +This easy p state support should always be allowed. + +[how] +Define a new latency in socBB. Add fallback logic to support it. Note DML is also updated to ensure +that fallback will always work. + +Change-Id: I068e34cae8fa2f7cd31b530a68822c479525048c +Signed-off-by: Jun Lei <Jun.Lei@amd.com> +Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com> +Acked-by: Leo Li <sunpeng.li@amd.com> +--- + .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 2 + + drivers/gpu/drm/amd/display/dc/dc.h | 7 + + .../drm/amd/display/dc/dcn20/dcn20_hubbub.c | 11 + + .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 10 +- + .../drm/amd/display/dc/dcn20/dcn20_resource.c | 59 +- + drivers/gpu/drm/amd/display/dc/dml/Makefile | 3 + + .../dc/dml/dcn20/display_mode_vba_20v2.c | 5109 +++++++++++++++++ + .../dc/dml/dcn20/display_mode_vba_20v2.h | 32 + + .../dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 1701 ++++++ + .../dc/dml/dcn20/display_rq_dlg_calc_20v2.h | 74 + + .../drm/amd/display/dc/dml/display_mode_lib.c | 12 + + .../drm/amd/display/dc/dml/display_mode_lib.h | 1 + + .../amd/display/dc/dml/display_mode_structs.h | 1 + + .../drm/amd/display/dc/dml/display_mode_vba.c | 8 +- + 14 files changed, 7022 insertions(+), 8 deletions(-) + create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h + +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +index 3cff4f0518d3..7ff0396956b3 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +@@ -201,6 +201,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, + } + + if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { ++ clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; + clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support; + if (pp_smu && pp_smu->set_pstate_handshake_support) + pp_smu->set_pstate_handshake_support(&pp_smu->pp_smu, clk_mgr_base->clks.p_state_change_support); +@@ -308,6 +309,7 @@ void dcn2_init_clocks(struct clk_mgr *clk_mgr) + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); + // Assumption is that boot state always supports pstate + clk_mgr->clks.p_state_change_support = true; ++ clk_mgr->clks.prev_p_state_change_support = true; + } + + void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base) +diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h +index 24320d04f5d1..8d890468908f 100644 +--- a/drivers/gpu/drm/amd/display/dc/dc.h ++++ b/drivers/gpu/drm/amd/display/dc/dc.h +@@ -124,6 +124,7 @@ struct dc_caps { + struct dc_bug_wa { + bool no_connect_phy_config; + bool dedcn20_305_wa; ++ struct display_mode_lib alternate_dml; + }; + #endif + +@@ -266,6 +267,12 @@ struct dc_clocks { + int phyclk_khz; + int dramclk_khz; + bool p_state_change_support; ++ ++ /* ++ * Elements below are not compared for the purposes of ++ * optimization required ++ */ ++ bool prev_p_state_change_support; + }; + + struct dc_bw_validation_profile { +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +index 6e2dbd03f9bf..31d6e79ba2b8 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +@@ -26,6 +26,7 @@ + + #include "dcn20_hubbub.h" + #include "reg_helper.h" ++#include "clk_mgr.h" + + #define REG(reg)\ + hubbub1->regs->reg +@@ -553,6 +554,16 @@ static void hubbub2_program_watermarks( + */ + hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); + hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); ++ ++ /* ++ * There's a special case when going from p-state support to p-state unsupported ++ * here we are going to LOWER watermarks to go to dummy p-state only, but this has ++ * to be done prepare_bandwidth, not optimize ++ */ ++ if (hubbub1->base.ctx->dc->clk_mgr->clks.prev_p_state_change_support == true && ++ hubbub1->base.ctx->dc->clk_mgr->clks.p_state_change_support == false) ++ safe_to_lower = true; ++ + hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); + + REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0, +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +index e60be115691b..08a96faef775 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +@@ -1445,16 +1445,16 @@ void dcn20_prepare_bandwidth( + { + struct hubbub *hubbub = dc->res_pool->hubbub; + ++ dc->clk_mgr->funcs->update_clocks( ++ dc->clk_mgr, ++ context, ++ false); ++ + /* program dchubbub watermarks */ + hubbub->funcs->program_watermarks(hubbub, + &context->bw_ctx.bw.dcn.watermarks, + dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, + false); +- +- dc->clk_mgr->funcs->update_clocks( +- dc->clk_mgr, +- context, +- false); + } + + void dcn20_optimize_bandwidth( +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +index 193270ba60e6..2cf788a3704e 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +@@ -2425,7 +2425,7 @@ void dcn20_calculate_dlg_params( + } + } + +-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, ++static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context, + bool fast_validate) + { + bool out = false; +@@ -2477,6 +2477,62 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, + return out; + } + ++ ++bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, ++ bool fast_validate) ++{ ++ bool voltage_supported = false; ++ bool full_pstate_supported = false; ++ bool dummy_pstate_supported = false; ++ double p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us; ++ ++ if (fast_validate) ++ return dcn20_validate_bandwidth_internal(dc, context, true); ++ ++ ++ // Best case, we support full UCLK switch latency ++ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); ++ full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; ++ ++ if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || ++ (voltage_supported && full_pstate_supported)) { ++ context->bw_ctx.bw.dcn.clk.p_state_change_support = true; ++ goto restore_dml_state; ++ } ++ ++ // Fallback #1: Try to only support G6 temperature read latency ++ context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; ++ ++ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); ++ dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; ++ ++ if (voltage_supported && dummy_pstate_supported) { ++ context->bw_ctx.bw.dcn.clk.p_state_change_support = false; ++ goto restore_dml_state; ++ } ++ ++ // Fallback #2: Retry with "new" DCN20 to support G6 temperature read latency ++ memcpy (&context->bw_ctx.dml, &dc->work_arounds.alternate_dml, sizeof (struct display_mode_lib)); ++ context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; ++ ++ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); ++ dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; ++ ++ if (voltage_supported && dummy_pstate_supported) { ++ context->bw_ctx.bw.dcn.clk.p_state_change_support = false; ++ goto restore_dml_state; ++ } ++ ++ // ERROR: fallback #2 is supposed to always work. ++ ASSERT(false); ++ ++restore_dml_state: ++ memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); ++ context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; ++ ++ return voltage_supported; ++} ++ + struct pipe_ctx *dcn20_acquire_idle_pipe_for_layer( + struct dc_state *state, + const struct resource_pool *pool, +@@ -3073,6 +3129,7 @@ static bool construct( + } + + dml_init_instance(&dc->dml, &dcn2_0_soc, &dcn2_0_ip, DML_PROJECT_NAVI10); ++ dml_init_instance(&dc->work_arounds.alternate_dml, &dcn2_0_soc, &dcn2_0_ip, DML_PROJECT_NAVI10v2); + + if (!dc->debug.disable_pplib_wm_range) { + struct pp_smu_wm_range_sets ranges = {0}; +diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile +index 0bb7a20675c4..1735fc1e2eb1 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile ++++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile +@@ -38,6 +38,8 @@ ifdef CONFIG_DRM_AMD_DC_DCN2_0 + CFLAGS_display_mode_vba.o := $(dml_ccflags) + CFLAGS_display_mode_vba_20.o := $(dml_ccflags) + CFLAGS_display_rq_dlg_calc_20.o := $(dml_ccflags) ++CFLAGS_display_mode_vba_20v2.o := $(dml_ccflags) ++CFLAGS_display_rq_dlg_calc_20v2.o := $(dml_ccflags) + endif + ifdef CONFIG_DRM_AMD_DCN3AG + CFLAGS_display_mode_vba_3ag.o := $(dml_ccflags) +@@ -51,6 +53,7 @@ DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \ + + ifdef CONFIG_DRM_AMD_DC_DCN2_0 + DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o ++DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o + endif + + AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML)) +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +new file mode 100644 +index 000000000000..22455db54980 +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +@@ -0,0 +1,5109 @@ ++/* ++ * Copyright 2018 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#include "../display_mode_lib.h" ++#include "display_mode_vba_20v2.h" ++#include "../dml_inline_defs.h" ++ ++/* ++ * NOTE: ++ * This file is gcc-parseable HW gospel, coming straight from HW engineers. ++ * ++ * It doesn't adhere to Linux kernel style and sometimes will do things in odd ++ * ways. Unless there is something clearly wrong with it the code should ++ * remain as-is as it provides us with a guarantee from HW that it is correct. ++ */ ++ ++#define BPP_INVALID 0 ++#define BPP_BLENDED_PIPE 0xffffffff ++ ++static double adjust_ReturnBW( ++ struct display_mode_lib *mode_lib, ++ double ReturnBW, ++ bool DCCEnabledAnyPlane, ++ double ReturnBandwidthToDCN); ++static unsigned int dscceComputeDelay( ++ unsigned int bpc, ++ double bpp, ++ unsigned int sliceWidth, ++ unsigned int numSlices, ++ enum output_format_class pixelFormat); ++static unsigned int dscComputeDelay(enum output_format_class pixelFormat); ++static bool CalculateDelayAfterScaler( ++ struct display_mode_lib *mode_lib, ++ double ReturnBW, ++ double ReadBandwidthPlaneLuma, ++ double ReadBandwidthPlaneChroma, ++ double TotalDataReadBandwidth, ++ double DisplayPipeLineDeliveryTimeLuma, ++ double DisplayPipeLineDeliveryTimeChroma, ++ double DPPCLK, ++ double DISPCLK, ++ double PixelClock, ++ unsigned int DSCDelay, ++ unsigned int DPPPerPlane, ++ bool ScalerEnabled, ++ unsigned int NumberOfCursors, ++ double DPPCLKDelaySubtotal, ++ double DPPCLKDelaySCL, ++ double DPPCLKDelaySCLLBOnly, ++ double DPPCLKDelayCNVCFormater, ++ double DPPCLKDelayCNVCCursor, ++ double DISPCLKDelaySubtotal, ++ unsigned int ScalerRecoutWidth, ++ enum output_format_class OutputFormat, ++ unsigned int HTotal, ++ unsigned int SwathWidthSingleDPPY, ++ double BytePerPixelDETY, ++ double BytePerPixelDETC, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ bool Interlace, ++ bool ProgressiveToInterlaceUnitInOPP, ++ double *DSTXAfterScaler, ++ double *DSTYAfterScaler ++ ); ++// Super monster function with some 45 argument ++static bool CalculatePrefetchSchedule( ++ struct display_mode_lib *mode_lib, ++ double DPPCLK, ++ double DISPCLK, ++ double PixelClock, ++ double DCFCLKDeepSleep, ++ unsigned int DPPPerPlane, ++ unsigned int NumberOfCursors, ++ unsigned int VBlank, ++ unsigned int HTotal, ++ unsigned int MaxInterDCNTileRepeaters, ++ unsigned int VStartup, ++ unsigned int PageTableLevels, ++ bool GPUVMEnable, ++ bool DynamicMetadataEnable, ++ unsigned int DynamicMetadataLinesBeforeActiveRequired, ++ unsigned int DynamicMetadataTransmittedBytes, ++ bool DCCEnable, ++ double UrgentLatencyPixelDataOnly, ++ double UrgentExtraLatency, ++ double TCalc, ++ unsigned int PDEAndMetaPTEBytesFrame, ++ unsigned int MetaRowByte, ++ unsigned int PixelPTEBytesPerRow, ++ double PrefetchSourceLinesY, ++ unsigned int SwathWidthY, ++ double BytePerPixelDETY, ++ double VInitPreFillY, ++ unsigned int MaxNumSwathY, ++ double PrefetchSourceLinesC, ++ double BytePerPixelDETC, ++ double VInitPreFillC, ++ unsigned int MaxNumSwathC, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ double TWait, ++ bool XFCEnabled, ++ double XFCRemoteSurfaceFlipDelay, ++ bool InterlaceEnable, ++ bool ProgressiveToInterlaceUnitInOPP, ++ double DSTXAfterScaler, ++ double DSTYAfterScaler, ++ double *DestinationLinesForPrefetch, ++ double *PrefetchBandwidth, ++ double *DestinationLinesToRequestVMInVBlank, ++ double *DestinationLinesToRequestRowInVBlank, ++ double *VRatioPrefetchY, ++ double *VRatioPrefetchC, ++ double *RequiredPrefetchPixDataBW, ++ double *Tno_bw, ++ unsigned int *VUpdateOffsetPix, ++ double *VUpdateWidthPix, ++ double *VReadyOffsetPix); ++static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); ++static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); ++static double CalculatePrefetchSourceLines( ++ struct display_mode_lib *mode_lib, ++ double VRatio, ++ double vtaps, ++ bool Interlace, ++ bool ProgressiveToInterlaceUnitInOPP, ++ unsigned int SwathHeight, ++ unsigned int ViewportYStart, ++ double *VInitPreFill, ++ unsigned int *MaxNumSwath); ++static unsigned int CalculateVMAndRowBytes( ++ struct display_mode_lib *mode_lib, ++ bool DCCEnable, ++ unsigned int BlockHeight256Bytes, ++ unsigned int BlockWidth256Bytes, ++ enum source_format_class SourcePixelFormat, ++ unsigned int SurfaceTiling, ++ unsigned int BytePerPixel, ++ enum scan_direction_class ScanDirection, ++ unsigned int ViewportWidth, ++ unsigned int ViewportHeight, ++ unsigned int SwathWidthY, ++ bool GPUVMEnable, ++ unsigned int VMMPageSize, ++ unsigned int PTEBufferSizeInRequestsLuma, ++ unsigned int PDEProcessingBufIn64KBReqs, ++ unsigned int Pitch, ++ unsigned int DCCMetaPitch, ++ unsigned int *MacroTileWidth, ++ unsigned int *MetaRowByte, ++ unsigned int *PixelPTEBytesPerRow, ++ bool *PTEBufferSizeNotExceeded, ++ unsigned int *dpte_row_height, ++ unsigned int *meta_row_height); ++static double CalculateTWait( ++ unsigned int PrefetchMode, ++ double DRAMClockChangeLatency, ++ double UrgentLatencyPixelDataOnly, ++ double SREnterPlusExitTime); ++static double CalculateRemoteSurfaceFlipDelay( ++ struct display_mode_lib *mode_lib, ++ double VRatio, ++ double SwathWidth, ++ double Bpp, ++ double LineTime, ++ double XFCTSlvVupdateOffset, ++ double XFCTSlvVupdateWidth, ++ double XFCTSlvVreadyOffset, ++ double XFCXBUFLatencyTolerance, ++ double XFCFillBWOverhead, ++ double XFCSlvChunkSize, ++ double XFCBusTransportTime, ++ double TCalc, ++ double TWait, ++ double *SrcActiveDrainRate, ++ double *TInitXFill, ++ double *TslvChk); ++static void CalculateActiveRowBandwidth( ++ bool GPUVMEnable, ++ enum source_format_class SourcePixelFormat, ++ double VRatio, ++ bool DCCEnable, ++ double LineTime, ++ unsigned int MetaRowByteLuma, ++ unsigned int MetaRowByteChroma, ++ unsigned int meta_row_height_luma, ++ unsigned int meta_row_height_chroma, ++ unsigned int PixelPTEBytesPerRowLuma, ++ unsigned int PixelPTEBytesPerRowChroma, ++ unsigned int dpte_row_height_luma, ++ unsigned int dpte_row_height_chroma, ++ double *meta_row_bw, ++ double *dpte_row_bw, ++ double *qual_row_bw); ++static void CalculateFlipSchedule( ++ struct display_mode_lib *mode_lib, ++ double UrgentExtraLatency, ++ double UrgentLatencyPixelDataOnly, ++ unsigned int GPUVMMaxPageTableLevels, ++ bool GPUVMEnable, ++ double BandwidthAvailableForImmediateFlip, ++ unsigned int TotImmediateFlipBytes, ++ enum source_format_class SourcePixelFormat, ++ unsigned int ImmediateFlipBytes, ++ double LineTime, ++ double VRatio, ++ double Tno_bw, ++ double PDEAndMetaPTEBytesFrame, ++ unsigned int MetaRowByte, ++ unsigned int PixelPTEBytesPerRow, ++ bool DCCEnable, ++ unsigned int dpte_row_height, ++ unsigned int meta_row_height, ++ double qual_row_bw, ++ double *DestinationLinesToRequestVMInImmediateFlip, ++ double *DestinationLinesToRequestRowInImmediateFlip, ++ double *final_flip_bw, ++ bool *ImmediateFlipSupportedForPipe); ++static double CalculateWriteBackDelay( ++ enum source_format_class WritebackPixelFormat, ++ double WritebackHRatio, ++ double WritebackVRatio, ++ unsigned int WritebackLumaHTaps, ++ unsigned int WritebackLumaVTaps, ++ unsigned int WritebackChromaHTaps, ++ unsigned int WritebackChromaVTaps, ++ unsigned int WritebackDestinationWidth); ++ ++static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib); ++static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( ++ struct display_mode_lib *mode_lib); ++ ++void dml20v2_recalculate(struct display_mode_lib *mode_lib) ++{ ++ ModeSupportAndSystemConfiguration(mode_lib); ++ mode_lib->vba.FabricAndDRAMBandwidth = dml_min( ++ mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, ++ mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000.0; ++ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); ++ dml20v2_DisplayPipeConfiguration(mode_lib); ++ dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); ++} ++ ++static double adjust_ReturnBW( ++ struct display_mode_lib *mode_lib, ++ double ReturnBW, ++ bool DCCEnabledAnyPlane, ++ double ReturnBandwidthToDCN) ++{ ++ double CriticalCompression; ++ ++ if (DCCEnabledAnyPlane ++ && ReturnBandwidthToDCN ++ > mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0) ++ ReturnBW = ++ dml_min( ++ ReturnBW, ++ ReturnBandwidthToDCN * 4 ++ * (1.0 ++ - mode_lib->vba.UrgentLatencyPixelDataOnly ++ / ((mode_lib->vba.ROBBufferSizeInKByte ++ - mode_lib->vba.PixelChunkSizeInKByte) ++ * 1024 ++ / ReturnBandwidthToDCN ++ - mode_lib->vba.DCFCLK ++ * mode_lib->vba.ReturnBusWidth ++ / 4) ++ + mode_lib->vba.UrgentLatencyPixelDataOnly)); ++ ++ CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK ++ * mode_lib->vba.UrgentLatencyPixelDataOnly ++ / (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatencyPixelDataOnly ++ + (mode_lib->vba.ROBBufferSizeInKByte ++ - mode_lib->vba.PixelChunkSizeInKByte) ++ * 1024); ++ ++ if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0) ++ ReturnBW = ++ dml_min( ++ ReturnBW, ++ 4.0 * ReturnBandwidthToDCN ++ * (mode_lib->vba.ROBBufferSizeInKByte ++ - mode_lib->vba.PixelChunkSizeInKByte) ++ * 1024 ++ * mode_lib->vba.ReturnBusWidth ++ * mode_lib->vba.DCFCLK ++ * mode_lib->vba.UrgentLatencyPixelDataOnly ++ / dml_pow( ++ (ReturnBandwidthToDCN ++ * mode_lib->vba.UrgentLatencyPixelDataOnly ++ + (mode_lib->vba.ROBBufferSizeInKByte ++ - mode_lib->vba.PixelChunkSizeInKByte) ++ * 1024), ++ 2)); ++ ++ return ReturnBW; ++} ++ ++static unsigned int dscceComputeDelay( ++ unsigned int bpc, ++ double bpp, ++ unsigned int sliceWidth, ++ unsigned int numSlices, ++ enum output_format_class pixelFormat) ++{ ++ // valid bpc = source bits per component in the set of {8, 10, 12} ++ // valid bpp = increments of 1/16 of a bit ++ // min = 6/7/8 in N420/N422/444, respectively ++ // max = such that compression is 1:1 ++ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) ++ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} ++ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} ++ ++ // fixed value ++ unsigned int rcModelSize = 8192; ++ ++ // N422/N420 operate at 2 pixels per clock ++ unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l, ++ Delay, pixels; ++ ++ if (pixelFormat == dm_n422 || pixelFormat == dm_420) ++ pixelsPerClock = 2; ++ // #all other modes operate at 1 pixel per clock ++ else ++ pixelsPerClock = 1; ++ ++ //initial transmit delay as per PPS ++ initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); ++ ++ //compute ssm delay ++ if (bpc == 8) ++ D = 81; ++ else if (bpc == 10) ++ D = 89; ++ else ++ D = 113; ++ ++ //divide by pixel per cycle to compute slice width as seen by DSC ++ w = sliceWidth / pixelsPerClock; ++ ++ //422 mode has an additional cycle of delay ++ if (pixelFormat == dm_s422) ++ s = 1; ++ else ++ s = 0; ++ ++ //main calculation for the dscce ++ ix = initalXmitDelay + 45; ++ wx = (w + 2) / 3; ++ p = 3 * wx - w; ++ l0 = ix / w; ++ a = ix + p * l0; ++ ax = (a + 2) / 3 + D + 6 + 1; ++ l = (ax + wx - 1) / wx; ++ if ((ix % w) == 0 && p != 0) ++ lstall = 1; ++ else ++ lstall = 0; ++ Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22; ++ ++ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels ++ pixels = Delay * 3 * pixelsPerClock; ++ return pixels; ++} ++ ++static unsigned int dscComputeDelay(enum output_format_class pixelFormat) ++{ ++ unsigned int Delay = 0; ++ ++ if (pixelFormat == dm_420) { ++ // sfr ++ Delay = Delay + 2; ++ // dsccif ++ Delay = Delay + 0; ++ // dscc - input deserializer ++ Delay = Delay + 3; ++ // dscc gets pixels every other cycle ++ Delay = Delay + 2; ++ // dscc - input cdc fifo ++ Delay = Delay + 12; ++ // dscc gets pixels every other cycle ++ Delay = Delay + 13; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output cdc fifo ++ Delay = Delay + 7; ++ // dscc gets pixels every other cycle ++ Delay = Delay + 3; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output serializer ++ Delay = Delay + 1; ++ // sft ++ Delay = Delay + 1; ++ } else if (pixelFormat == dm_n422) { ++ // sfr ++ Delay = Delay + 2; ++ // dsccif ++ Delay = Delay + 1; ++ // dscc - input deserializer ++ Delay = Delay + 5; ++ // dscc - input cdc fifo ++ Delay = Delay + 25; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output cdc fifo ++ Delay = Delay + 10; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output serializer ++ Delay = Delay + 1; ++ // sft ++ Delay = Delay + 1; ++ } else { ++ // sfr ++ Delay = Delay + 2; ++ // dsccif ++ Delay = Delay + 0; ++ // dscc - input deserializer ++ Delay = Delay + 3; ++ // dscc - input cdc fifo ++ Delay = Delay + 12; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output cdc fifo ++ Delay = Delay + 7; ++ // dscc - output serializer ++ Delay = Delay + 1; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // sft ++ Delay = Delay + 1; ++ } ++ ++ return Delay; ++} ++ ++static bool CalculateDelayAfterScaler( ++ struct display_mode_lib *mode_lib, ++ double ReturnBW, ++ double ReadBandwidthPlaneLuma, ++ double ReadBandwidthPlaneChroma, ++ double TotalDataReadBandwidth, ++ double DisplayPipeLineDeliveryTimeLuma, ++ double DisplayPipeLineDeliveryTimeChroma, ++ double DPPCLK, ++ double DISPCLK, ++ double PixelClock, ++ unsigned int DSCDelay, ++ unsigned int DPPPerPlane, ++ bool ScalerEnabled, ++ unsigned int NumberOfCursors, ++ double DPPCLKDelaySubtotal, ++ double DPPCLKDelaySCL, ++ double DPPCLKDelaySCLLBOnly, ++ double DPPCLKDelayCNVCFormater, ++ double DPPCLKDelayCNVCCursor, ++ double DISPCLKDelaySubtotal, ++ unsigned int ScalerRecoutWidth, ++ enum output_format_class OutputFormat, ++ unsigned int HTotal, ++ unsigned int SwathWidthSingleDPPY, ++ double BytePerPixelDETY, ++ double BytePerPixelDETC, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ bool Interlace, ++ bool ProgressiveToInterlaceUnitInOPP, ++ double *DSTXAfterScaler, ++ double *DSTYAfterScaler ++ ) ++{ ++ unsigned int DPPCycles, DISPCLKCycles; ++ double DataFabricLineDeliveryTimeLuma; ++ double DataFabricLineDeliveryTimeChroma; ++ double DSTTotalPixelsAfterScaler; ++ ++ DataFabricLineDeliveryTimeLuma = SwathWidthSingleDPPY * SwathHeightY * dml_ceil(BytePerPixelDETY, 1) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneLuma / TotalDataReadBandwidth); ++ mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeLuma - DisplayPipeLineDeliveryTimeLuma); ++ ++ if (BytePerPixelDETC != 0) { ++ DataFabricLineDeliveryTimeChroma = SwathWidthSingleDPPY / 2 * SwathHeightC * dml_ceil(BytePerPixelDETC, 2) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneChroma / TotalDataReadBandwidth); ++ mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeChroma - DisplayPipeLineDeliveryTimeChroma); ++ } ++ ++ if (ScalerEnabled) ++ DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; ++ else ++ DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; ++ ++ DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor; ++ ++ DISPCLKCycles = DISPCLKDelaySubtotal; ++ ++ if (DPPCLK == 0.0 || DISPCLK == 0.0) ++ return true; ++ ++ *DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK ++ + DSCDelay; ++ ++ if (DPPPerPlane > 1) ++ *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; ++ ++ if (OutputFormat == dm_420 || (Interlace && ProgressiveToInterlaceUnitInOPP)) ++ *DSTYAfterScaler = 1; ++ else ++ *DSTYAfterScaler = 0; ++ ++ DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler; ++ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1); ++ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal)); ++ ++ return true; ++} ++ ++static bool CalculatePrefetchSchedule( ++ struct display_mode_lib *mode_lib, ++ double DPPCLK, ++ double DISPCLK, ++ double PixelClock, ++ double DCFCLKDeepSleep, ++ unsigned int DPPPerPlane, ++ unsigned int NumberOfCursors, ++ unsigned int VBlank, ++ unsigned int HTotal, ++ unsigned int MaxInterDCNTileRepeaters, ++ unsigned int VStartup, ++ unsigned int PageTableLevels, ++ bool GPUVMEnable, ++ bool DynamicMetadataEnable, ++ unsigned int DynamicMetadataLinesBeforeActiveRequired, ++ unsigned int DynamicMetadataTransmittedBytes, ++ bool DCCEnable, ++ double UrgentLatencyPixelDataOnly, ++ double UrgentExtraLatency, ++ double TCalc, ++ unsigned int PDEAndMetaPTEBytesFrame, ++ unsigned int MetaRowByte, ++ unsigned int PixelPTEBytesPerRow, ++ double PrefetchSourceLinesY, ++ unsigned int SwathWidthY, ++ double BytePerPixelDETY, ++ double VInitPreFillY, ++ unsigned int MaxNumSwathY, ++ double PrefetchSourceLinesC, ++ double BytePerPixelDETC, ++ double VInitPreFillC, ++ unsigned int MaxNumSwathC, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ double TWait, ++ bool XFCEnabled, ++ double XFCRemoteSurfaceFlipDelay, ++ bool InterlaceEnable, ++ bool ProgressiveToInterlaceUnitInOPP, ++ double DSTXAfterScaler, ++ double DSTYAfterScaler, ++ double *DestinationLinesForPrefetch, ++ double *PrefetchBandwidth, ++ double *DestinationLinesToRequestVMInVBlank, ++ double *DestinationLinesToRequestRowInVBlank, ++ double *VRatioPrefetchY, ++ double *VRatioPrefetchC, ++ double *RequiredPrefetchPixDataBW, ++ double *Tno_bw, ++ unsigned int *VUpdateOffsetPix, ++ double *VUpdateWidthPix, ++ double *VReadyOffsetPix) ++{ ++ bool MyError = false; ++ double TotalRepeaterDelayTime; ++ double Tdm, LineTime, Tsetup; ++ double dst_y_prefetch_equ; ++ double Tsw_oto; ++ double prefetch_bw_oto; ++ double Tvm_oto; ++ double Tr0_oto; ++ double Tpre_oto; ++ double dst_y_prefetch_oto; ++ double TimeForFetchingMetaPTE = 0; ++ double TimeForFetchingRowInVBlank = 0; ++ double LinesToRequestPrefetchPixelData = 0; ++ ++ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); ++ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK); ++ *VUpdateWidthPix = (14.0 / DCFCLKDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) ++ * PixelClock; ++ ++ *VReadyOffsetPix = dml_max( ++ 150.0 / DPPCLK, ++ TotalRepeaterDelayTime + 20.0 / DCFCLKDeepSleep + 10.0 / DPPCLK) ++ * PixelClock; ++ ++ Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; ++ ++ LineTime = (double) HTotal / PixelClock; ++ ++ if (DynamicMetadataEnable) { ++ double Tdmbf, Tdmec, Tdmsks; ++ ++ Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); ++ Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; ++ Tdmec = LineTime; ++ if (DynamicMetadataLinesBeforeActiveRequired == 0) ++ Tdmsks = VBlank * LineTime / 2.0; ++ else ++ Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; ++ if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) ++ Tdmsks = Tdmsks / 2; ++ if (VStartup * LineTime ++ < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { ++ MyError = true; ++ } ++ } else ++ Tdm = 0; ++ ++ if (GPUVMEnable) { ++ if (PageTableLevels == 4) ++ *Tno_bw = UrgentExtraLatency + UrgentLatencyPixelDataOnly; ++ else if (PageTableLevels == 3) ++ *Tno_bw = UrgentExtraLatency; ++ else ++ *Tno_bw = 0; ++ } else if (DCCEnable) ++ *Tno_bw = LineTime; ++ else ++ *Tno_bw = LineTime / 4; ++ ++ dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime ++ - (Tsetup + Tdm) / LineTime ++ - (DSTYAfterScaler + DSTXAfterScaler / HTotal); ++ ++ Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; ++ ++ prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow ++ + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) ++ + PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2)) ++ / Tsw_oto; ++ ++ if (GPUVMEnable == true) { ++ Tvm_oto = ++ dml_max( ++ *Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto, ++ dml_max( ++ UrgentExtraLatency ++ + UrgentLatencyPixelDataOnly ++ * (PageTableLevels ++ - 1), ++ LineTime / 4.0)); ++ } else ++ Tvm_oto = LineTime / 4.0; ++ ++ if ((GPUVMEnable == true || DCCEnable == true)) { ++ Tr0_oto = dml_max( ++ (MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto, ++ dml_max(UrgentLatencyPixelDataOnly, dml_max(LineTime - Tvm_oto, LineTime / 4))); ++ } else ++ Tr0_oto = LineTime - Tvm_oto; ++ ++ Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto; ++ ++ dst_y_prefetch_oto = Tpre_oto / LineTime; ++ ++ if (dst_y_prefetch_oto < dst_y_prefetch_equ) ++ *DestinationLinesForPrefetch = dst_y_prefetch_oto; ++ else ++ *DestinationLinesForPrefetch = dst_y_prefetch_equ; ++ ++ *DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1) ++ / 4; ++ ++ dml_print("DML: VStartup: %d\n", VStartup); ++ dml_print("DML: TCalc: %f\n", TCalc); ++ dml_print("DML: TWait: %f\n", TWait); ++ dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); ++ dml_print("DML: LineTime: %f\n", LineTime); ++ dml_print("DML: Tsetup: %f\n", Tsetup); ++ dml_print("DML: Tdm: %f\n", Tdm); ++ dml_print("DML: DSTYAfterScaler: %f\n", DSTYAfterScaler); ++ dml_print("DML: DSTXAfterScaler: %f\n", DSTXAfterScaler); ++ dml_print("DML: HTotal: %d\n", HTotal); ++ ++ *PrefetchBandwidth = 0; ++ *DestinationLinesToRequestVMInVBlank = 0; ++ *DestinationLinesToRequestRowInVBlank = 0; ++ *VRatioPrefetchY = 0; ++ *VRatioPrefetchC = 0; ++ *RequiredPrefetchPixDataBW = 0; ++ if (*DestinationLinesForPrefetch > 1) { ++ *PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte ++ + 2 * PixelPTEBytesPerRow ++ + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) ++ + PrefetchSourceLinesC * SwathWidthY / 2 ++ * dml_ceil(BytePerPixelDETC, 2)) ++ / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); ++ if (GPUVMEnable) { ++ TimeForFetchingMetaPTE = ++ dml_max( ++ *Tno_bw ++ + (double) PDEAndMetaPTEBytesFrame ++ / *PrefetchBandwidth, ++ dml_max( ++ UrgentExtraLatency ++ + UrgentLatencyPixelDataOnly ++ * (PageTableLevels ++ - 1), ++ LineTime / 4)); ++ } else { ++ if (NumberOfCursors > 0 || XFCEnabled) ++ TimeForFetchingMetaPTE = LineTime / 4; ++ else ++ TimeForFetchingMetaPTE = 0.0; ++ } ++ ++ if ((GPUVMEnable == true || DCCEnable == true)) { ++ TimeForFetchingRowInVBlank = ++ dml_max( ++ (MetaRowByte + PixelPTEBytesPerRow) ++ / *PrefetchBandwidth, ++ dml_max( ++ UrgentLatencyPixelDataOnly, ++ dml_max( ++ LineTime ++ - TimeForFetchingMetaPTE, ++ LineTime ++ / 4.0))); ++ } else { ++ if (NumberOfCursors > 0 || XFCEnabled) ++ TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE; ++ else ++ TimeForFetchingRowInVBlank = 0.0; ++ } ++ ++ *DestinationLinesToRequestVMInVBlank = dml_floor( ++ 4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125), ++ 1) / 4.0; ++ ++ *DestinationLinesToRequestRowInVBlank = dml_floor( ++ 4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125), ++ 1) / 4.0; ++ ++ LinesToRequestPrefetchPixelData = ++ *DestinationLinesForPrefetch ++ - ((NumberOfCursors > 0 || GPUVMEnable ++ || DCCEnable) ? ++ (*DestinationLinesToRequestVMInVBlank ++ + *DestinationLinesToRequestRowInVBlank) : ++ 0.0); ++ ++ if (LinesToRequestPrefetchPixelData > 0) { ++ ++ *VRatioPrefetchY = (double) PrefetchSourceLinesY ++ / LinesToRequestPrefetchPixelData; ++ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); ++ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { ++ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { ++ *VRatioPrefetchY = ++ dml_max( ++ (double) PrefetchSourceLinesY ++ / LinesToRequestPrefetchPixelData, ++ (double) MaxNumSwathY ++ * SwathHeightY ++ / (LinesToRequestPrefetchPixelData ++ - (VInitPreFillY ++ - 3.0) ++ / 2.0)); ++ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); ++ } else { ++ MyError = true; ++ *VRatioPrefetchY = 0; ++ } ++ } ++ ++ *VRatioPrefetchC = (double) PrefetchSourceLinesC ++ / LinesToRequestPrefetchPixelData; ++ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); ++ ++ if ((SwathHeightC > 4)) { ++ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { ++ *VRatioPrefetchC = ++ dml_max( ++ *VRatioPrefetchC, ++ (double) MaxNumSwathC ++ * SwathHeightC ++ / (LinesToRequestPrefetchPixelData ++ - (VInitPreFillC ++ - 3.0) ++ / 2.0)); ++ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); ++ } else { ++ MyError = true; ++ *VRatioPrefetchC = 0; ++ } ++ } ++ ++ *RequiredPrefetchPixDataBW = ++ DPPPerPlane ++ * ((double) PrefetchSourceLinesY ++ / LinesToRequestPrefetchPixelData ++ * dml_ceil( ++ BytePerPixelDETY, ++ 1) ++ + (double) PrefetchSourceLinesC ++ / LinesToRequestPrefetchPixelData ++ * dml_ceil( ++ BytePerPixelDETC, ++ 2) ++ / 2) ++ * SwathWidthY / LineTime; ++ } else { ++ MyError = true; ++ *VRatioPrefetchY = 0; ++ *VRatioPrefetchC = 0; ++ *RequiredPrefetchPixDataBW = 0; ++ } ++ ++ } else { ++ MyError = true; ++ } ++ ++ if (MyError) { ++ *PrefetchBandwidth = 0; ++ TimeForFetchingMetaPTE = 0; ++ TimeForFetchingRowInVBlank = 0; ++ *DestinationLinesToRequestVMInVBlank = 0; ++ *DestinationLinesToRequestRowInVBlank = 0; ++ *DestinationLinesForPrefetch = 0; ++ LinesToRequestPrefetchPixelData = 0; ++ *VRatioPrefetchY = 0; ++ *VRatioPrefetchC = 0; ++ *RequiredPrefetchPixDataBW = 0; ++ } ++ ++ return MyError; ++} ++ ++static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) ++{ ++ return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); ++} ++ ++static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) ++{ ++ return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); ++} ++ ++static double CalculatePrefetchSourceLines( ++ struct display_mode_lib *mode_lib, ++ double VRatio, ++ double vtaps, ++ bool Interlace, ++ bool ProgressiveToInterlaceUnitInOPP, ++ unsigned int SwathHeight, ++ unsigned int ViewportYStart, ++ double *VInitPreFill, ++ unsigned int *MaxNumSwath) ++{ ++ unsigned int MaxPartialSwath; ++ ++ if (ProgressiveToInterlaceUnitInOPP) ++ *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); ++ else ++ *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); ++ ++ if (!mode_lib->vba.IgnoreViewportPositioning) { ++ ++ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; ++ ++ if (*VInitPreFill > 1.0) ++ MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; ++ else ++ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) ++ % SwathHeight; ++ MaxPartialSwath = dml_max(1U, MaxPartialSwath); ++ ++ } else { ++ ++ if (ViewportYStart != 0) ++ dml_print( ++ "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); ++ ++ *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); ++ ++ if (*VInitPreFill > 1.0) ++ MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; ++ else ++ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) ++ % SwathHeight; ++ } ++ ++ return *MaxNumSwath * SwathHeight + MaxPartialSwath; ++} ++ ++static unsigned int CalculateVMAndRowBytes( ++ struct display_mode_lib *mode_lib, ++ bool DCCEnable, ++ unsigned int BlockHeight256Bytes, ++ unsigned int BlockWidth256Bytes, ++ enum source_format_class SourcePixelFormat, ++ unsigned int SurfaceTiling, ++ unsigned int BytePerPixel, ++ enum scan_direction_class ScanDirection, ++ unsigned int ViewportWidth, ++ unsigned int ViewportHeight, ++ unsigned int SwathWidth, ++ bool GPUVMEnable, ++ unsigned int VMMPageSize, ++ unsigned int PTEBufferSizeInRequestsLuma, ++ unsigned int PDEProcessingBufIn64KBReqs, ++ unsigned int Pitch, ++ unsigned int DCCMetaPitch, ++ unsigned int *MacroTileWidth, ++ unsigned int *MetaRowByte, ++ unsigned int *PixelPTEBytesPerRow, ++ bool *PTEBufferSizeNotExceeded, ++ unsigned int *dpte_row_height, ++ unsigned int *meta_row_height) ++{ ++ unsigned int MetaRequestHeight; ++ unsigned int MetaRequestWidth; ++ unsigned int MetaSurfWidth; ++ unsigned int MetaSurfHeight; ++ unsigned int MPDEBytesFrame; ++ unsigned int MetaPTEBytesFrame; ++ unsigned int DCCMetaSurfaceBytes; ++ ++ unsigned int MacroTileSizeBytes; ++ unsigned int MacroTileHeight; ++ unsigned int DPDE0BytesFrame; ++ unsigned int ExtraDPDEBytesFrame; ++ unsigned int PDEAndMetaPTEBytesFrame; ++ ++ if (DCCEnable == true) { ++ MetaRequestHeight = 8 * BlockHeight256Bytes; ++ MetaRequestWidth = 8 * BlockWidth256Bytes; ++ if (ScanDirection == dm_horz) { ++ *meta_row_height = MetaRequestHeight; ++ MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth) ++ + MetaRequestWidth; ++ *MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0; ++ } else { ++ *meta_row_height = MetaRequestWidth; ++ MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight) ++ + MetaRequestHeight; ++ *MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0; ++ } ++ if (ScanDirection == dm_horz) { ++ DCCMetaSurfaceBytes = DCCMetaPitch ++ * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) ++ + 64 * BlockHeight256Bytes) * BytePerPixel ++ / 256; ++ } else { ++ DCCMetaSurfaceBytes = DCCMetaPitch ++ * (dml_ceil( ++ (double) ViewportHeight - 1, ++ 64 * BlockHeight256Bytes) ++ + 64 * BlockHeight256Bytes) * BytePerPixel ++ / 256; ++ } ++ if (GPUVMEnable == true) { ++ MetaPTEBytesFrame = (dml_ceil( ++ (double) (DCCMetaSurfaceBytes - VMMPageSize) ++ / (8 * VMMPageSize), ++ 1) + 1) * 64; ++ MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1); ++ } else { ++ MetaPTEBytesFrame = 0; ++ MPDEBytesFrame = 0; ++ } ++ } else { ++ MetaPTEBytesFrame = 0; ++ MPDEBytesFrame = 0; ++ *MetaRowByte = 0; ++ } ++ ++ if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { ++ MacroTileSizeBytes = 256; ++ MacroTileHeight = BlockHeight256Bytes; ++ } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x ++ || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { ++ MacroTileSizeBytes = 4096; ++ MacroTileHeight = 4 * BlockHeight256Bytes; ++ } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t ++ || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d ++ || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x ++ || SurfaceTiling == dm_sw_64kb_r_x) { ++ MacroTileSizeBytes = 65536; ++ MacroTileHeight = 16 * BlockHeight256Bytes; ++ } else { ++ MacroTileSizeBytes = 262144; ++ MacroTileHeight = 32 * BlockHeight256Bytes; ++ } ++ *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; ++ ++ if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) { ++ if (ScanDirection == dm_horz) { ++ DPDE0BytesFrame = ++ 64 ++ * (dml_ceil( ++ ((Pitch ++ * (dml_ceil( ++ ViewportHeight ++ - 1, ++ MacroTileHeight) ++ + MacroTileHeight) ++ * BytePerPixel) ++ - MacroTileSizeBytes) ++ / (8 ++ * 2097152), ++ 1) + 1); ++ } else { ++ DPDE0BytesFrame = ++ 64 ++ * (dml_ceil( ++ ((Pitch ++ * (dml_ceil( ++ (double) SwathWidth ++ - 1, ++ MacroTileHeight) ++ + MacroTileHeight) ++ * BytePerPixel) ++ - MacroTileSizeBytes) ++ / (8 ++ * 2097152), ++ 1) + 1); ++ } ++ ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2); ++ } else { ++ DPDE0BytesFrame = 0; ++ ExtraDPDEBytesFrame = 0; ++ } ++ ++ PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame ++ + ExtraDPDEBytesFrame; ++ ++ if (GPUVMEnable == true) { ++ unsigned int PTERequestSize; ++ unsigned int PixelPTEReqHeight; ++ unsigned int PixelPTEReqWidth; ++ double FractionOfPTEReturnDrop; ++ unsigned int EffectivePDEProcessingBufIn64KBReqs; ++ ++ if (SurfaceTiling == dm_sw_linear) { ++ PixelPTEReqHeight = 1; ++ PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; ++ PTERequestSize = 64; ++ FractionOfPTEReturnDrop = 0; ++ } else if (MacroTileSizeBytes == 4096) { ++ PixelPTEReqHeight = MacroTileHeight; ++ PixelPTEReqWidth = 8 * *MacroTileWidth; ++ PTERequestSize = 64; ++ if (ScanDirection == dm_horz) ++ FractionOfPTEReturnDrop = 0; ++ else ++ FractionOfPTEReturnDrop = 7 / 8; ++ } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { ++ PixelPTEReqHeight = 16 * BlockHeight256Bytes; ++ PixelPTEReqWidth = 16 * BlockWidth256Bytes; ++ PTERequestSize = 128; ++ FractionOfPTEReturnDrop = 0; ++ } else { ++ PixelPTEReqHeight = MacroTileHeight; ++ PixelPTEReqWidth = 8 * *MacroTileWidth; ++ PTERequestSize = 64; ++ FractionOfPTEReturnDrop = 0; ++ } ++ ++ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) ++ EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2; ++ else ++ EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs; ++ ++ if (SurfaceTiling == dm_sw_linear) { ++ *dpte_row_height = ++ dml_min( ++ 128, ++ 1 ++ << (unsigned int) dml_floor( ++ dml_log2( ++ dml_min( ++ (double) PTEBufferSizeInRequestsLuma ++ * PixelPTEReqWidth, ++ EffectivePDEProcessingBufIn64KBReqs ++ * 65536.0 ++ / BytePerPixel) ++ / Pitch), ++ 1)); ++ *PixelPTEBytesPerRow = PTERequestSize ++ * (dml_ceil( ++ (double) (Pitch * *dpte_row_height - 1) ++ / PixelPTEReqWidth, ++ 1) + 1); ++ } else if (ScanDirection == dm_horz) { ++ *dpte_row_height = PixelPTEReqHeight; ++ *PixelPTEBytesPerRow = PTERequestSize ++ * (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1) ++ + 1); ++ } else { ++ *dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth); ++ *PixelPTEBytesPerRow = PTERequestSize ++ * (dml_ceil( ++ ((double) SwathWidth - 1) ++ / PixelPTEReqHeight, ++ 1) + 1); ++ } ++ if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) ++ <= 64 * PTEBufferSizeInRequestsLuma) { ++ *PTEBufferSizeNotExceeded = true; ++ } else { ++ *PTEBufferSizeNotExceeded = false; ++ } ++ } else { ++ *PixelPTEBytesPerRow = 0; ++ *PTEBufferSizeNotExceeded = true; ++ } ++ ++ return PDEAndMetaPTEBytesFrame; ++} ++ ++static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( ++ struct display_mode_lib *mode_lib) ++{ ++ unsigned int j, k; ++ ++ mode_lib->vba.WritebackDISPCLK = 0.0; ++ mode_lib->vba.DISPCLKWithRamping = 0; ++ mode_lib->vba.DISPCLKWithoutRamping = 0; ++ mode_lib->vba.GlobalDPPCLK = 0.0; ++ ++ // dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation ++ // ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.WritebackEnable[k]) { ++ mode_lib->vba.WritebackDISPCLK = ++ dml_max( ++ mode_lib->vba.WritebackDISPCLK, ++ CalculateWriteBackDISPCLK( ++ mode_lib->vba.WritebackPixelFormat[k], ++ mode_lib->vba.PixelClock[k], ++ mode_lib->vba.WritebackHRatio[k], ++ mode_lib->vba.WritebackVRatio[k], ++ mode_lib->vba.WritebackLumaHTaps[k], ++ mode_lib->vba.WritebackLumaVTaps[k], ++ mode_lib->vba.WritebackChromaHTaps[k], ++ mode_lib->vba.WritebackChromaVTaps[k], ++ mode_lib->vba.WritebackDestinationWidth[k], ++ mode_lib->vba.HTotal[k], ++ mode_lib->vba.WritebackChromaLineBufferWidth)); ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.HRatio[k] > 1) { ++ mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput ++ * mode_lib->vba.HRatio[k] ++ / dml_ceil( ++ mode_lib->vba.htaps[k] ++ / 6.0, ++ 1)); ++ } else { ++ mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput); ++ } ++ ++ mode_lib->vba.DPPCLKUsingSingleDPPLuma = ++ mode_lib->vba.PixelClock[k] ++ * dml_max( ++ mode_lib->vba.vtaps[k] / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k]), ++ dml_max( ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k], ++ 1.0)); ++ ++ if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) ++ && mode_lib->vba.DPPCLKUsingSingleDPPLuma ++ < 2 * mode_lib->vba.PixelClock[k]) { ++ mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; ++ } ++ ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { ++ mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0; ++ mode_lib->vba.DPPCLKUsingSingleDPP[k] = ++ mode_lib->vba.DPPCLKUsingSingleDPPLuma; ++ } else { ++ if (mode_lib->vba.HRatio[k] > 1) { ++ mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = ++ dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput ++ * mode_lib->vba.HRatio[k] ++ / 2 ++ / dml_ceil( ++ mode_lib->vba.HTAPsChroma[k] ++ / 6.0, ++ 1.0)); ++ } else { ++ mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput); ++ } ++ mode_lib->vba.DPPCLKUsingSingleDPPChroma = ++ mode_lib->vba.PixelClock[k] ++ * dml_max( ++ mode_lib->vba.VTAPsChroma[k] ++ / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k] ++ / 2), ++ dml_max( ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / 4 ++ / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k], ++ 1.0)); ++ ++ if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) ++ && mode_lib->vba.DPPCLKUsingSingleDPPChroma ++ < 2 * mode_lib->vba.PixelClock[k]) { ++ mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 ++ * mode_lib->vba.PixelClock[k]; ++ } ++ ++ mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max( ++ mode_lib->vba.DPPCLKUsingSingleDPPLuma, ++ mode_lib->vba.DPPCLKUsingSingleDPPChroma); ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.BlendingAndTiming[k] != k) ++ continue; ++ if (mode_lib->vba.ODMCombineEnabled[k]) { ++ mode_lib->vba.DISPCLKWithRamping = ++ dml_max( ++ mode_lib->vba.DISPCLKWithRamping, ++ mode_lib->vba.PixelClock[k] / 2 ++ * (1 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100) ++ * (1 ++ + mode_lib->vba.DISPCLKRampingMargin ++ / 100)); ++ mode_lib->vba.DISPCLKWithoutRamping = ++ dml_max( ++ mode_lib->vba.DISPCLKWithoutRamping, ++ mode_lib->vba.PixelClock[k] / 2 ++ * (1 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100)); ++ } else if (!mode_lib->vba.ODMCombineEnabled[k]) { ++ mode_lib->vba.DISPCLKWithRamping = ++ dml_max( ++ mode_lib->vba.DISPCLKWithRamping, ++ mode_lib->vba.PixelClock[k] ++ * (1 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100) ++ * (1 ++ + mode_lib->vba.DISPCLKRampingMargin ++ / 100)); ++ mode_lib->vba.DISPCLKWithoutRamping = ++ dml_max( ++ mode_lib->vba.DISPCLKWithoutRamping, ++ mode_lib->vba.PixelClock[k] ++ * (1 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100)); ++ } ++ } ++ ++ mode_lib->vba.DISPCLKWithRamping = dml_max( ++ mode_lib->vba.DISPCLKWithRamping, ++ mode_lib->vba.WritebackDISPCLK); ++ mode_lib->vba.DISPCLKWithoutRamping = dml_max( ++ mode_lib->vba.DISPCLKWithoutRamping, ++ mode_lib->vba.WritebackDISPCLK); ++ ++ ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); ++ mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( ++ mode_lib->vba.DISPCLKWithRamping, ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( ++ mode_lib->vba.DISPCLKWithoutRamping, ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( ++ mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states].dispclk_mhz, ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity ++ > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { ++ mode_lib->vba.DISPCLK_calculated = ++ mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; ++ } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity ++ > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { ++ mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; ++ } else { ++ mode_lib->vba.DISPCLK_calculated = ++ mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; ++ } ++ DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.DPPPerPlane[k] == 0) { ++ mode_lib->vba.DPPCLK_calculated[k] = 0; ++ } else { ++ mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k] ++ / mode_lib->vba.DPPPerPlane[k] ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); ++ } ++ mode_lib->vba.GlobalDPPCLK = dml_max( ++ mode_lib->vba.GlobalDPPCLK, ++ mode_lib->vba.DPPCLK_calculated[k]); ++ } ++ mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( ++ mode_lib->vba.GlobalDPPCLK, ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 ++ * dml_ceil( ++ mode_lib->vba.DPPCLK_calculated[k] * 255 ++ / mode_lib->vba.GlobalDPPCLK, ++ 1); ++ DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); ++ } ++ ++ // Urgent Watermark ++ mode_lib->vba.DCCEnabledAnyPlane = false; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) ++ if (mode_lib->vba.DCCEnable[k]) ++ mode_lib->vba.DCCEnabledAnyPlane = true; ++ ++ mode_lib->vba.ReturnBandwidthToDCN = dml_min( ++ mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, ++ mode_lib->vba.FabricAndDRAMBandwidth * 1000) ++ * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; ++ ++ mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN; ++ mode_lib->vba.ReturnBW = adjust_ReturnBW( ++ mode_lib, ++ mode_lib->vba.ReturnBW, ++ mode_lib->vba.DCCEnabledAnyPlane, ++ mode_lib->vba.ReturnBandwidthToDCN); ++ ++ // Let's do this calculation again?? ++ mode_lib->vba.ReturnBandwidthToDCN = dml_min( ++ mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, ++ mode_lib->vba.FabricAndDRAMBandwidth * 1000); ++ mode_lib->vba.ReturnBW = adjust_ReturnBW( ++ mode_lib, ++ mode_lib->vba.ReturnBW, ++ mode_lib->vba.DCCEnabledAnyPlane, ++ mode_lib->vba.ReturnBandwidthToDCN); ++ ++ DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); ++ DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); ++ DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ bool MainPlaneDoesODMCombine = false; ++ ++ if (mode_lib->vba.SourceScan[k] == dm_horz) ++ mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; ++ else ++ mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; ++ ++ if (mode_lib->vba.ODMCombineEnabled[k] == true) ++ MainPlaneDoesODMCombine = true; ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) ++ if (mode_lib->vba.BlendingAndTiming[k] == j ++ && mode_lib->vba.ODMCombineEnabled[j] == true) ++ MainPlaneDoesODMCombine = true; ++ ++ if (MainPlaneDoesODMCombine == true) ++ mode_lib->vba.SwathWidthY[k] = dml_min( ++ (double) mode_lib->vba.SwathWidthSingleDPPY[k], ++ dml_round( ++ mode_lib->vba.HActive[k] / 2.0 ++ * mode_lib->vba.HRatio[k])); ++ else { ++ if (mode_lib->vba.DPPPerPlane[k] == 0) { ++ mode_lib->vba.SwathWidthY[k] = 0; ++ } else { ++ mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k] ++ / mode_lib->vba.DPPPerPlane[k]; ++ } ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { ++ mode_lib->vba.BytePerPixelDETY[k] = 8; ++ mode_lib->vba.BytePerPixelDETC[k] = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { ++ mode_lib->vba.BytePerPixelDETY[k] = 4; ++ mode_lib->vba.BytePerPixelDETC[k] = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { ++ mode_lib->vba.BytePerPixelDETY[k] = 2; ++ mode_lib->vba.BytePerPixelDETC[k] = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { ++ mode_lib->vba.BytePerPixelDETY[k] = 1; ++ mode_lib->vba.BytePerPixelDETC[k] = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { ++ mode_lib->vba.BytePerPixelDETY[k] = 1; ++ mode_lib->vba.BytePerPixelDETC[k] = 2; ++ } else { // dm_420_10 ++ mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0; ++ mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0; ++ } ++ } ++ ++ mode_lib->vba.TotalDataReadBandwidth = 0.0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] ++ * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) ++ * mode_lib->vba.VRatio[k]; ++ mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] ++ / 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) ++ * mode_lib->vba.VRatio[k] / 2; ++ DTRACE( ++ " read_bw[%i] = %fBps", ++ k, ++ mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k]); ++ mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k]; ++ } ++ ++ mode_lib->vba.TotalDCCActiveDPP = 0; ++ mode_lib->vba.TotalActiveDPP = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP ++ + mode_lib->vba.DPPPerPlane[k]; ++ if (mode_lib->vba.DCCEnable[k]) ++ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP ++ + mode_lib->vba.DPPPerPlane[k]; ++ } ++ ++ mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = ++ (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK ++ + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly ++ * mode_lib->vba.NumberOfChannels ++ / mode_lib->vba.ReturnBW; ++ ++ mode_lib->vba.LastPixelOfLineExtraWatermark = 0; ++ ++ mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency ++ + (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte ++ + mode_lib->vba.TotalDCCActiveDPP ++ * mode_lib->vba.MetaChunkSize) * 1024.0 ++ / mode_lib->vba.ReturnBW; ++ ++ if (mode_lib->vba.GPUVMEnable) ++ mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP ++ * mode_lib->vba.PTEGroupSize / mode_lib->vba.ReturnBW; ++ ++ mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatencyPixelDataOnly ++ + mode_lib->vba.LastPixelOfLineExtraWatermark ++ + mode_lib->vba.UrgentExtraLatency; ++ ++ DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency); ++ DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark); ++ ++ mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly; ++ ++ mode_lib->vba.TotalActiveWriteback = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.WritebackEnable[k]) ++ mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + mode_lib->vba.ActiveWritebacksPerPlane[k]; ++ } ++ ++ if (mode_lib->vba.TotalActiveWriteback <= 1) ++ mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency; ++ else ++ mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency ++ + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 ++ / mode_lib->vba.SOCCLK; ++ ++ DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark); ++ ++ // NB P-State/DRAM Clock Change Watermark ++ mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency ++ + mode_lib->vba.UrgentWatermark; ++ ++ DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark); ++ ++ DTRACE(" calculating wb pstate watermark"); ++ DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback); ++ DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK); ++ ++ if (mode_lib->vba.TotalActiveWriteback <= 1) ++ mode_lib->vba.WritebackDRAMClockChangeWatermark = ++ mode_lib->vba.DRAMClockChangeLatency ++ + mode_lib->vba.WritebackLatency; ++ else ++ mode_lib->vba.WritebackDRAMClockChangeWatermark = ++ mode_lib->vba.DRAMClockChangeLatency ++ + mode_lib->vba.WritebackLatency ++ + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 ++ / mode_lib->vba.SOCCLK; ++ ++ DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark); ++ ++ // Stutter Efficiency ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k] ++ / mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k]; ++ mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor( ++ mode_lib->vba.LinesInDETY[k], ++ mode_lib->vba.SwathHeightY[k]); ++ mode_lib->vba.FullDETBufferingTimeY[k] = ++ mode_lib->vba.LinesInDETYRoundedDownToSwath[k] ++ * (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) ++ / mode_lib->vba.VRatio[k]; ++ if (mode_lib->vba.BytePerPixelDETC[k] > 0) { ++ mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k] ++ / mode_lib->vba.BytePerPixelDETC[k] ++ / (mode_lib->vba.SwathWidthY[k] / 2); ++ mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor( ++ mode_lib->vba.LinesInDETC[k], ++ mode_lib->vba.SwathHeightC[k]); ++ mode_lib->vba.FullDETBufferingTimeC[k] = ++ mode_lib->vba.LinesInDETCRoundedDownToSwath[k] ++ * (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) ++ / (mode_lib->vba.VRatio[k] / 2); ++ } else { ++ mode_lib->vba.LinesInDETC[k] = 0; ++ mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0; ++ mode_lib->vba.FullDETBufferingTimeC[k] = 999999; ++ } ++ } ++ ++ mode_lib->vba.MinFullDETBufferingTime = 999999.0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.FullDETBufferingTimeY[k] ++ < mode_lib->vba.MinFullDETBufferingTime) { ++ mode_lib->vba.MinFullDETBufferingTime = ++ mode_lib->vba.FullDETBufferingTimeY[k]; ++ mode_lib->vba.FrameTimeForMinFullDETBufferingTime = ++ (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]; ++ } ++ if (mode_lib->vba.FullDETBufferingTimeC[k] ++ < mode_lib->vba.MinFullDETBufferingTime) { ++ mode_lib->vba.MinFullDETBufferingTime = ++ mode_lib->vba.FullDETBufferingTimeC[k]; ++ mode_lib->vba.FrameTimeForMinFullDETBufferingTime = ++ (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]; ++ } ++ } ++ ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.DCCEnable[k]) { ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond = ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond ++ + mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ / mode_lib->vba.DCCRate[k] ++ / 1000 ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k] ++ / mode_lib->vba.DCCRate[k] ++ / 1000; ++ } else { ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond = ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond ++ + mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ / 1000 ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k] ++ / 1000; ++ } ++ if (mode_lib->vba.DCCEnable[k]) { ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond = ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond ++ + mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ / 1000 / 256 ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k] ++ / 1000 / 256; ++ } ++ if (mode_lib->vba.GPUVMEnable) { ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond = ++ mode_lib->vba.AverageReadBandwidthGBytePerSecond ++ + mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ / 1000 / 512 ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k] ++ / 1000 / 512; ++ } ++ } ++ ++ mode_lib->vba.PartOfBurstThatFitsInROB = ++ dml_min( ++ mode_lib->vba.MinFullDETBufferingTime ++ * mode_lib->vba.TotalDataReadBandwidth, ++ mode_lib->vba.ROBBufferSizeInKByte * 1024 ++ * mode_lib->vba.TotalDataReadBandwidth ++ / (mode_lib->vba.AverageReadBandwidthGBytePerSecond ++ * 1000)); ++ mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB ++ * (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000) ++ / mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW ++ + (mode_lib->vba.MinFullDETBufferingTime ++ * mode_lib->vba.TotalDataReadBandwidth ++ - mode_lib->vba.PartOfBurstThatFitsInROB) ++ / (mode_lib->vba.DCFCLK * 64); ++ if (mode_lib->vba.TotalActiveWriteback == 0) { ++ mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 ++ - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) ++ / mode_lib->vba.MinFullDETBufferingTime) * 100; ++ } else { ++ mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; ++ } ++ ++ mode_lib->vba.SmallestVBlank = 999999; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { ++ mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] ++ - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]; ++ } else { ++ mode_lib->vba.VBlankTime = 0; ++ } ++ mode_lib->vba.SmallestVBlank = dml_min( ++ mode_lib->vba.SmallestVBlank, ++ mode_lib->vba.VBlankTime); ++ } ++ ++ mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 ++ * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime ++ - mode_lib->vba.SmallestVBlank) ++ + mode_lib->vba.SmallestVBlank) ++ / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; ++ ++ // dml_ml->vba.DCFCLK Deep Sleep ++ mode_lib->vba.DCFCLKDeepSleep = 8.0; ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) { ++ if (mode_lib->vba.BytePerPixelDETC[k] > 0) { ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = ++ dml_max( ++ 1.1 * mode_lib->vba.SwathWidthY[k] ++ * dml_ceil( ++ mode_lib->vba.BytePerPixelDETY[k], ++ 1) / 32 ++ / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], ++ 1.1 * mode_lib->vba.SwathWidthY[k] / 2.0 ++ * dml_ceil( ++ mode_lib->vba.BytePerPixelDETC[k], ++ 2) / 32 ++ / mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); ++ } else ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * mode_lib->vba.SwathWidthY[k] ++ * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0 ++ / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]; ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k], ++ mode_lib->vba.PixelClock[k] / 16.0); ++ mode_lib->vba.DCFCLKDeepSleep = dml_max( ++ mode_lib->vba.DCFCLKDeepSleep, ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); ++ ++ DTRACE( ++ " dcfclk_deepsleep_per_plane[%i] = %fMHz", ++ k, ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); ++ } ++ ++ DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFCLKDeepSleep); ++ ++ // Stutter Watermark ++ mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime ++ + mode_lib->vba.LastPixelOfLineExtraWatermark ++ + mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFCLKDeepSleep; ++ mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime ++ + mode_lib->vba.LastPixelOfLineExtraWatermark ++ + mode_lib->vba.UrgentExtraLatency; ++ ++ DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark); ++ DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark); ++ ++ // Urgent Latency Supported ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.EffectiveDETPlusLBLinesLuma = ++ dml_floor( ++ mode_lib->vba.LinesInDETY[k] ++ + dml_min( ++ mode_lib->vba.LinesInDETY[k] ++ * mode_lib->vba.DPPCLK[k] ++ * mode_lib->vba.BytePerPixelDETY[k] ++ * mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] ++ / (mode_lib->vba.ReturnBW ++ / mode_lib->vba.DPPPerPlane[k]), ++ (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma), ++ mode_lib->vba.SwathHeightY[k]); ++ ++ mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma ++ * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) ++ / mode_lib->vba.VRatio[k] ++ - mode_lib->vba.EffectiveDETPlusLBLinesLuma ++ * mode_lib->vba.SwathWidthY[k] ++ * mode_lib->vba.BytePerPixelDETY[k] ++ / (mode_lib->vba.ReturnBW ++ / mode_lib->vba.DPPPerPlane[k]); ++ ++ if (mode_lib->vba.BytePerPixelDETC[k] > 0) { ++ mode_lib->vba.EffectiveDETPlusLBLinesChroma = ++ dml_floor( ++ mode_lib->vba.LinesInDETC[k] ++ + dml_min( ++ mode_lib->vba.LinesInDETC[k] ++ * mode_lib->vba.DPPCLK[k] ++ * mode_lib->vba.BytePerPixelDETC[k] ++ * mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] ++ / (mode_lib->vba.ReturnBW ++ / mode_lib->vba.DPPPerPlane[k]), ++ (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma), ++ mode_lib->vba.SwathHeightC[k]); ++ mode_lib->vba.UrgentLatencySupportUsChroma = ++ mode_lib->vba.EffectiveDETPlusLBLinesChroma ++ * (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) ++ / (mode_lib->vba.VRatio[k] / 2) ++ - mode_lib->vba.EffectiveDETPlusLBLinesChroma ++ * (mode_lib->vba.SwathWidthY[k] ++ / 2) ++ * mode_lib->vba.BytePerPixelDETC[k] ++ / (mode_lib->vba.ReturnBW ++ / mode_lib->vba.DPPPerPlane[k]); ++ mode_lib->vba.UrgentLatencySupportUs[k] = dml_min( ++ mode_lib->vba.UrgentLatencySupportUsLuma, ++ mode_lib->vba.UrgentLatencySupportUsChroma); ++ } else { ++ mode_lib->vba.UrgentLatencySupportUs[k] = ++ mode_lib->vba.UrgentLatencySupportUsLuma; ++ } ++ } ++ ++ mode_lib->vba.MinUrgentLatencySupportUs = 999999; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.MinUrgentLatencySupportUs = dml_min( ++ mode_lib->vba.MinUrgentLatencySupportUs, ++ mode_lib->vba.UrgentLatencySupportUs[k]); ++ } ++ ++ // Non-Urgent Latency Tolerance ++ mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs ++ - mode_lib->vba.UrgentWatermark; ++ ++ // DSCCLK ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { ++ mode_lib->vba.DSCCLK_calculated[k] = 0.0; ++ } else { ++ if (mode_lib->vba.OutputFormat[k] == dm_420 ++ || mode_lib->vba.OutputFormat[k] == dm_n422) ++ mode_lib->vba.DSCFormatFactor = 2; ++ else ++ mode_lib->vba.DSCFormatFactor = 1; ++ if (mode_lib->vba.ODMCombineEnabled[k]) ++ mode_lib->vba.DSCCLK_calculated[k] = ++ mode_lib->vba.PixelClockBackEnd[k] / 6 ++ / mode_lib->vba.DSCFormatFactor ++ / (1 ++ - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100); ++ else ++ mode_lib->vba.DSCCLK_calculated[k] = ++ mode_lib->vba.PixelClockBackEnd[k] / 3 ++ / mode_lib->vba.DSCFormatFactor ++ / (1 ++ - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100); ++ } ++ } ++ ++ // DSC Delay ++ // TODO ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ double bpp = mode_lib->vba.OutputBpp[k]; ++ unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; ++ ++ if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { ++ if (!mode_lib->vba.ODMCombineEnabled[k]) { ++ mode_lib->vba.DSCDelay[k] = ++ dscceComputeDelay( ++ mode_lib->vba.DSCInputBitPerComponent[k], ++ bpp, ++ dml_ceil( ++ (double) mode_lib->vba.HActive[k] ++ / mode_lib->vba.NumberOfDSCSlices[k], ++ 1), ++ slices, ++ mode_lib->vba.OutputFormat[k]) ++ + dscComputeDelay( ++ mode_lib->vba.OutputFormat[k]); ++ } else { ++ mode_lib->vba.DSCDelay[k] = ++ 2 ++ * (dscceComputeDelay( ++ mode_lib->vba.DSCInputBitPerComponent[k], ++ bpp, ++ dml_ceil( ++ (double) mode_lib->vba.HActive[k] ++ / mode_lib->vba.NumberOfDSCSlices[k], ++ 1), ++ slices / 2.0, ++ mode_lib->vba.OutputFormat[k]) ++ + dscComputeDelay( ++ mode_lib->vba.OutputFormat[k])); ++ } ++ mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k] ++ * mode_lib->vba.PixelClock[k] ++ / mode_lib->vba.PixelClockBackEnd[k]; ++ } else { ++ mode_lib->vba.DSCDelay[k] = 0; ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes ++ if (j != k && mode_lib->vba.BlendingAndTiming[k] == j ++ && mode_lib->vba.DSCEnabled[j]) ++ mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j]; ++ ++ // Prefetch ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ unsigned int PDEAndMetaPTEBytesFrameY; ++ unsigned int PixelPTEBytesPerRowY; ++ unsigned int MetaRowByteY; ++ unsigned int MetaRowByteC; ++ unsigned int PDEAndMetaPTEBytesFrameC; ++ unsigned int PixelPTEBytesPerRowC; ++ ++ Calculate256BBlockSizes( ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), ++ dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2), ++ &mode_lib->vba.BlockHeight256BytesY[k], ++ &mode_lib->vba.BlockHeight256BytesC[k], ++ &mode_lib->vba.BlockWidth256BytesY[k], ++ &mode_lib->vba.BlockWidth256BytesC[k]); ++ PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( ++ mode_lib, ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.BlockHeight256BytesY[k], ++ mode_lib->vba.BlockWidth256BytesY[k], ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), ++ mode_lib->vba.SourceScan[k], ++ mode_lib->vba.ViewportWidth[k], ++ mode_lib->vba.ViewportHeight[k], ++ mode_lib->vba.SwathWidthY[k], ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.VMMPageSize, ++ mode_lib->vba.PTEBufferSizeInRequestsLuma, ++ mode_lib->vba.PDEProcessingBufIn64KBReqs, ++ mode_lib->vba.PitchY[k], ++ mode_lib->vba.DCCMetaPitchY[k], ++ &mode_lib->vba.MacroTileWidthY[k], ++ &MetaRowByteY, ++ &PixelPTEBytesPerRowY, ++ &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], ++ &mode_lib->vba.dpte_row_height[k], ++ &mode_lib->vba.meta_row_height[k]); ++ mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.vtaps[k], ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ mode_lib->vba.SwathHeightY[k], ++ mode_lib->vba.ViewportYStartY[k], ++ &mode_lib->vba.VInitPreFillY[k], ++ &mode_lib->vba.MaxNumSwathY[k]); ++ ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { ++ PDEAndMetaPTEBytesFrameC = ++ CalculateVMAndRowBytes( ++ mode_lib, ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.BlockHeight256BytesC[k], ++ mode_lib->vba.BlockWidth256BytesC[k], ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil( ++ mode_lib->vba.BytePerPixelDETC[k], ++ 2), ++ mode_lib->vba.SourceScan[k], ++ mode_lib->vba.ViewportWidth[k] / 2, ++ mode_lib->vba.ViewportHeight[k] / 2, ++ mode_lib->vba.SwathWidthY[k] / 2, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.VMMPageSize, ++ mode_lib->vba.PTEBufferSizeInRequestsLuma, ++ mode_lib->vba.PDEProcessingBufIn64KBReqs, ++ mode_lib->vba.PitchC[k], ++ 0, ++ &mode_lib->vba.MacroTileWidthC[k], ++ &MetaRowByteC, ++ &PixelPTEBytesPerRowC, ++ &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], ++ &mode_lib->vba.dpte_row_height_chroma[k], ++ &mode_lib->vba.meta_row_height_chroma[k]); ++ mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( ++ mode_lib, ++ mode_lib->vba.VRatio[k] / 2, ++ mode_lib->vba.VTAPsChroma[k], ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ mode_lib->vba.SwathHeightC[k], ++ mode_lib->vba.ViewportYStartC[k], ++ &mode_lib->vba.VInitPreFillC[k], ++ &mode_lib->vba.MaxNumSwathC[k]); ++ } else { ++ PixelPTEBytesPerRowC = 0; ++ PDEAndMetaPTEBytesFrameC = 0; ++ MetaRowByteC = 0; ++ mode_lib->vba.MaxNumSwathC[k] = 0; ++ mode_lib->vba.PrefetchSourceLinesC[k] = 0; ++ } ++ ++ mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; ++ mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY ++ + PDEAndMetaPTEBytesFrameC; ++ mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC; ++ ++ CalculateActiveRowBandwidth( ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ MetaRowByteY, ++ MetaRowByteC, ++ mode_lib->vba.meta_row_height[k], ++ mode_lib->vba.meta_row_height_chroma[k], ++ PixelPTEBytesPerRowY, ++ PixelPTEBytesPerRowC, ++ mode_lib->vba.dpte_row_height[k], ++ mode_lib->vba.dpte_row_height_chroma[k], ++ &mode_lib->vba.meta_row_bw[k], ++ &mode_lib->vba.dpte_row_bw[k], ++ &mode_lib->vba.qual_row_bw[k]); ++ } ++ ++ mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = ++ mode_lib->vba.WritebackLatency ++ + CalculateWriteBackDelay( ++ mode_lib->vba.WritebackPixelFormat[k], ++ mode_lib->vba.WritebackHRatio[k], ++ mode_lib->vba.WritebackVRatio[k], ++ mode_lib->vba.WritebackLumaHTaps[k], ++ mode_lib->vba.WritebackLumaVTaps[k], ++ mode_lib->vba.WritebackChromaHTaps[k], ++ mode_lib->vba.WritebackChromaVTaps[k], ++ mode_lib->vba.WritebackDestinationWidth[k]) ++ / mode_lib->vba.DISPCLK; ++ } else ++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { ++ if (mode_lib->vba.BlendingAndTiming[j] == k ++ && mode_lib->vba.WritebackEnable[j] == true) { ++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = ++ dml_max( ++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k], ++ mode_lib->vba.WritebackLatency ++ + CalculateWriteBackDelay( ++ mode_lib->vba.WritebackPixelFormat[j], ++ mode_lib->vba.WritebackHRatio[j], ++ mode_lib->vba.WritebackVRatio[j], ++ mode_lib->vba.WritebackLumaHTaps[j], ++ mode_lib->vba.WritebackLumaVTaps[j], ++ mode_lib->vba.WritebackChromaHTaps[j], ++ mode_lib->vba.WritebackChromaVTaps[j], ++ mode_lib->vba.WritebackDestinationWidth[j]) ++ / mode_lib->vba.DISPCLK); ++ } ++ } ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) ++ if (mode_lib->vba.BlendingAndTiming[k] == j) ++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = ++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j]; ++ ++ mode_lib->vba.VStartupLines = 13; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.MaxVStartupLines[k] = ++ mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] ++ - dml_max( ++ 1.0, ++ dml_ceil( ++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] ++ / (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]), ++ 1)); ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) ++ mode_lib->vba.MaximumMaxVStartupLines = dml_max( ++ mode_lib->vba.MaximumMaxVStartupLines, ++ mode_lib->vba.MaxVStartupLines[k]); ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.cursor_bw[k] = 0.0; ++ for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j) ++ mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j] ++ * mode_lib->vba.CursorBPP[k][j] / 8.0 ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) ++ * mode_lib->vba.VRatio[k]; ++ } ++ ++ do { ++ double MaxTotalRDBandwidth = 0; ++ bool DestinationLineTimesForPrefetchLessThan2 = false; ++ bool VRatioPrefetchMoreThan4 = false; ++ bool prefetch_vm_bw_valid = true; ++ bool prefetch_row_bw_valid = true; ++ double TWait = CalculateTWait( ++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], ++ mode_lib->vba.DRAMClockChangeLatency, ++ mode_lib->vba.UrgentLatencyPixelDataOnly, ++ mode_lib->vba.SREnterPlusExitTime); ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.XFCEnabled[k] == true) { ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = ++ CalculateRemoteSurfaceFlipDelay( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.SwathWidthY[k], ++ dml_ceil( ++ mode_lib->vba.BytePerPixelDETY[k], ++ 1), ++ mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.XFCTSlvVupdateOffset, ++ mode_lib->vba.XFCTSlvVupdateWidth, ++ mode_lib->vba.XFCTSlvVreadyOffset, ++ mode_lib->vba.XFCXBUFLatencyTolerance, ++ mode_lib->vba.XFCFillBWOverhead, ++ mode_lib->vba.XFCSlvChunkSize, ++ mode_lib->vba.XFCBusTransportTime, ++ mode_lib->vba.TCalc, ++ TWait, ++ &mode_lib->vba.SrcActiveDrainRate, ++ &mode_lib->vba.TInitXFill, ++ &mode_lib->vba.TslvChk); ++ } else { ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; ++ } ++ ++ CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBW, mode_lib->vba.ReadBandwidthPlaneLuma[k], mode_lib->vba.ReadBandwidthPlaneChroma[k], mode_lib->vba.TotalDataReadBandwidth, ++ mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], ++ mode_lib->vba.DPPCLK[k], mode_lib->vba.DISPCLK, mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelay[k], mode_lib->vba.DPPPerPlane[k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], ++ mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, ++ mode_lib->vba.SwathWidthY[k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k], ++ mode_lib->vba.SwathWidthSingleDPPY[k], mode_lib->vba.BytePerPixelDETY[k], mode_lib->vba.BytePerPixelDETC[k], mode_lib->vba.SwathHeightY[k], mode_lib->vba.SwathHeightC[k], mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]); ++ ++ mode_lib->vba.ErrorResult[k] = ++ CalculatePrefetchSchedule( ++ mode_lib, ++ mode_lib->vba.DPPCLK[k], ++ mode_lib->vba.DISPCLK, ++ mode_lib->vba.PixelClock[k], ++ mode_lib->vba.DCFCLKDeepSleep, ++ mode_lib->vba.DPPPerPlane[k], ++ mode_lib->vba.NumberOfCursors[k], ++ mode_lib->vba.VTotal[k] ++ - mode_lib->vba.VActive[k], ++ mode_lib->vba.HTotal[k], ++ mode_lib->vba.MaxInterDCNTileRepeaters, ++ dml_min( ++ mode_lib->vba.VStartupLines, ++ mode_lib->vba.MaxVStartupLines[k]), ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.DynamicMetadataEnable[k], ++ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], ++ mode_lib->vba.DynamicMetadataTransmittedBytes[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.UrgentLatencyPixelDataOnly, ++ mode_lib->vba.UrgentExtraLatency, ++ mode_lib->vba.TCalc, ++ mode_lib->vba.PDEAndMetaPTEBytesFrame[k], ++ mode_lib->vba.MetaRowByte[k], ++ mode_lib->vba.PixelPTEBytesPerRow[k], ++ mode_lib->vba.PrefetchSourceLinesY[k], ++ mode_lib->vba.SwathWidthY[k], ++ mode_lib->vba.BytePerPixelDETY[k], ++ mode_lib->vba.VInitPreFillY[k], ++ mode_lib->vba.MaxNumSwathY[k], ++ mode_lib->vba.PrefetchSourceLinesC[k], ++ mode_lib->vba.BytePerPixelDETC[k], ++ mode_lib->vba.VInitPreFillC[k], ++ mode_lib->vba.MaxNumSwathC[k], ++ mode_lib->vba.SwathHeightY[k], ++ mode_lib->vba.SwathHeightC[k], ++ TWait, ++ mode_lib->vba.XFCEnabled[k], ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay, ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ mode_lib->vba.DSTXAfterScaler[k], ++ mode_lib->vba.DSTYAfterScaler[k], ++ &mode_lib->vba.DestinationLinesForPrefetch[k], ++ &mode_lib->vba.PrefetchBandwidth[k], ++ &mode_lib->vba.DestinationLinesToRequestVMInVBlank[k], ++ &mode_lib->vba.DestinationLinesToRequestRowInVBlank[k], ++ &mode_lib->vba.VRatioPrefetchY[k], ++ &mode_lib->vba.VRatioPrefetchC[k], ++ &mode_lib->vba.RequiredPrefetchPixDataBWLuma[k], ++ &mode_lib->vba.Tno_bw[k], ++ &mode_lib->vba.VUpdateOffsetPix[k], ++ &mode_lib->vba.VUpdateWidthPix[k], ++ &mode_lib->vba.VReadyOffsetPix[k]); ++ ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ mode_lib->vba.VStartup[k] = dml_min( ++ mode_lib->vba.VStartupLines, ++ mode_lib->vba.MaxVStartupLines[k]); ++ if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata ++ != 0) { ++ mode_lib->vba.VStartup[k] = ++ mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; ++ } ++ } else { ++ mode_lib->vba.VStartup[k] = ++ dml_min( ++ mode_lib->vba.VStartupLines, ++ mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ ++ if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0) ++ mode_lib->vba.prefetch_vm_bw[k] = 0; ++ else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) { ++ mode_lib->vba.prefetch_vm_bw[k] = ++ (double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k] ++ / (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]); ++ } else { ++ mode_lib->vba.prefetch_vm_bw[k] = 0; ++ prefetch_vm_bw_valid = false; ++ } ++ if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k] ++ == 0) ++ mode_lib->vba.prefetch_row_bw[k] = 0; ++ else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) { ++ mode_lib->vba.prefetch_row_bw[k] = ++ (double) (mode_lib->vba.MetaRowByte[k] ++ + mode_lib->vba.PixelPTEBytesPerRow[k]) ++ / (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]); ++ } else { ++ mode_lib->vba.prefetch_row_bw[k] = 0; ++ prefetch_row_bw_valid = false; ++ } ++ ++ MaxTotalRDBandwidth = ++ MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k] ++ + dml_max( ++ mode_lib->vba.prefetch_vm_bw[k], ++ dml_max( ++ mode_lib->vba.prefetch_row_bw[k], ++ dml_max( ++ mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k], ++ mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]) ++ + mode_lib->vba.meta_row_bw[k] ++ + mode_lib->vba.dpte_row_bw[k])); ++ ++ if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2) ++ DestinationLineTimesForPrefetchLessThan2 = true; ++ if (mode_lib->vba.VRatioPrefetchY[k] > 4 ++ || mode_lib->vba.VRatioPrefetchC[k] > 4) ++ VRatioPrefetchMoreThan4 = true; ++ } ++ ++ if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid ++ && prefetch_row_bw_valid && !VRatioPrefetchMoreThan4 ++ && !DestinationLineTimesForPrefetchLessThan2) ++ mode_lib->vba.PrefetchModeSupported = true; ++ else { ++ mode_lib->vba.PrefetchModeSupported = false; ++ dml_print( ++ "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); ++ } ++ ++ if (mode_lib->vba.PrefetchModeSupported == true) { ++ double final_flip_bw[DC__NUM_DPP__MAX]; ++ unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX]; ++ double total_dcn_read_bw_with_flip = 0; ++ ++ mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.BandwidthAvailableForImmediateFlip = ++ mode_lib->vba.BandwidthAvailableForImmediateFlip ++ - mode_lib->vba.cursor_bw[k] ++ - dml_max( ++ mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k] ++ + mode_lib->vba.qual_row_bw[k], ++ mode_lib->vba.PrefetchBandwidth[k]); ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ ImmediateFlipBytes[k] = 0; ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { ++ ImmediateFlipBytes[k] = ++ mode_lib->vba.PDEAndMetaPTEBytesFrame[k] ++ + mode_lib->vba.MetaRowByte[k] ++ + mode_lib->vba.PixelPTEBytesPerRow[k]; ++ } ++ } ++ mode_lib->vba.TotImmediateFlipBytes = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { ++ mode_lib->vba.TotImmediateFlipBytes = ++ mode_lib->vba.TotImmediateFlipBytes ++ + ImmediateFlipBytes[k]; ++ } ++ } ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ CalculateFlipSchedule( ++ mode_lib, ++ mode_lib->vba.UrgentExtraLatency, ++ mode_lib->vba.UrgentLatencyPixelDataOnly, ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.BandwidthAvailableForImmediateFlip, ++ mode_lib->vba.TotImmediateFlipBytes, ++ mode_lib->vba.SourcePixelFormat[k], ++ ImmediateFlipBytes[k], ++ mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.Tno_bw[k], ++ mode_lib->vba.PDEAndMetaPTEBytesFrame[k], ++ mode_lib->vba.MetaRowByte[k], ++ mode_lib->vba.PixelPTEBytesPerRow[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.dpte_row_height[k], ++ mode_lib->vba.meta_row_height[k], ++ mode_lib->vba.qual_row_bw[k], ++ &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], ++ &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], ++ &final_flip_bw[k], ++ &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); ++ } ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ total_dcn_read_bw_with_flip = ++ total_dcn_read_bw_with_flip ++ + mode_lib->vba.cursor_bw[k] ++ + dml_max( ++ mode_lib->vba.prefetch_vm_bw[k], ++ dml_max( ++ mode_lib->vba.prefetch_row_bw[k], ++ final_flip_bw[k] ++ + dml_max( ++ mode_lib->vba.ReadBandwidthPlaneLuma[k] ++ + mode_lib->vba.ReadBandwidthPlaneChroma[k], ++ mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]))); ++ } ++ mode_lib->vba.ImmediateFlipSupported = true; ++ if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { ++ mode_lib->vba.ImmediateFlipSupported = false; ++ } ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { ++ mode_lib->vba.ImmediateFlipSupported = false; ++ } ++ } ++ } else { ++ mode_lib->vba.ImmediateFlipSupported = false; ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.ErrorResult[k]) { ++ mode_lib->vba.PrefetchModeSupported = false; ++ dml_print( ++ "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); ++ } ++ } ++ ++ mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; ++ } while (!((mode_lib->vba.PrefetchModeSupported ++ && (!mode_lib->vba.ImmediateFlipSupport ++ || mode_lib->vba.ImmediateFlipSupported)) ++ || mode_lib->vba.MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); ++ ++ //Display Pipeline Delivery Time in Prefetch ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.VRatioPrefetchY[k] <= 1) { ++ mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = ++ mode_lib->vba.SwathWidthY[k] * mode_lib->vba.DPPPerPlane[k] ++ / mode_lib->vba.HRatio[k] ++ / mode_lib->vba.PixelClock[k]; ++ } else { ++ mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = ++ mode_lib->vba.SwathWidthY[k] ++ / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] ++ / mode_lib->vba.DPPCLK[k]; ++ } ++ if (mode_lib->vba.BytePerPixelDETC[k] == 0) { ++ mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; ++ } else { ++ if (mode_lib->vba.VRatioPrefetchC[k] <= 1) { ++ mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = ++ mode_lib->vba.SwathWidthY[k] ++ * mode_lib->vba.DPPPerPlane[k] ++ / mode_lib->vba.HRatio[k] ++ / mode_lib->vba.PixelClock[k]; ++ } else { ++ mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = ++ mode_lib->vba.SwathWidthY[k] ++ / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] ++ / mode_lib->vba.DPPCLK[k]; ++ } ++ } ++ } ++ ++ // Min TTUVBlank ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { ++ mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = true; ++ mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; ++ mode_lib->vba.MinTTUVBlank[k] = dml_max( ++ mode_lib->vba.DRAMClockChangeWatermark, ++ dml_max( ++ mode_lib->vba.StutterEnterPlusExitWatermark, ++ mode_lib->vba.UrgentWatermark)); ++ } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) { ++ mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; ++ mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; ++ mode_lib->vba.MinTTUVBlank[k] = dml_max( ++ mode_lib->vba.StutterEnterPlusExitWatermark, ++ mode_lib->vba.UrgentWatermark); ++ } else { ++ mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; ++ mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = false; ++ mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; ++ } ++ if (!mode_lib->vba.DynamicMetadataEnable[k]) ++ mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.TCalc ++ + mode_lib->vba.MinTTUVBlank[k]; ++ } ++ ++ // DCC Configuration ++ mode_lib->vba.ActiveDPPs = 0; ++ // NB P-State/DRAM Clock Change Support ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.ActiveDPPs = mode_lib->vba.ActiveDPPs + mode_lib->vba.DPPPerPlane[k]; ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ double EffectiveLBLatencyHidingY; ++ double EffectiveLBLatencyHidingC; ++ double DPPOutputBufferLinesY; ++ double DPPOutputBufferLinesC; ++ double DPPOPPBufferingY; ++ double MaxDETBufferingTimeY; ++ double ActiveDRAMClockChangeLatencyMarginY; ++ ++ mode_lib->vba.LBLatencyHidingSourceLinesY = ++ dml_min( ++ mode_lib->vba.MaxLineBufferLines, ++ (unsigned int) dml_floor( ++ (double) mode_lib->vba.LineBufferSize ++ / mode_lib->vba.LBBitPerPixel[k] ++ / (mode_lib->vba.SwathWidthY[k] ++ / dml_max( ++ mode_lib->vba.HRatio[k], ++ 1.0)), ++ 1)) - (mode_lib->vba.vtaps[k] - 1); ++ ++ mode_lib->vba.LBLatencyHidingSourceLinesC = ++ dml_min( ++ mode_lib->vba.MaxLineBufferLines, ++ (unsigned int) dml_floor( ++ (double) mode_lib->vba.LineBufferSize ++ / mode_lib->vba.LBBitPerPixel[k] ++ / (mode_lib->vba.SwathWidthY[k] ++ / 2.0 ++ / dml_max( ++ mode_lib->vba.HRatio[k] ++ / 2, ++ 1.0)), ++ 1)) ++ - (mode_lib->vba.VTAPsChroma[k] - 1); ++ ++ EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY ++ / mode_lib->vba.VRatio[k] ++ * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); ++ ++ EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC ++ / (mode_lib->vba.VRatio[k] / 2) ++ * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); ++ ++ if (mode_lib->vba.SwathWidthY[k] > 2 * mode_lib->vba.DPPOutputBufferPixels) { ++ DPPOutputBufferLinesY = mode_lib->vba.DPPOutputBufferPixels ++ / mode_lib->vba.SwathWidthY[k]; ++ } else if (mode_lib->vba.SwathWidthY[k] > mode_lib->vba.DPPOutputBufferPixels) { ++ DPPOutputBufferLinesY = 0.5; ++ } else { ++ DPPOutputBufferLinesY = 1; ++ } ++ ++ if (mode_lib->vba.SwathWidthY[k] / 2 > 2 * mode_lib->vba.DPPOutputBufferPixels) { ++ DPPOutputBufferLinesC = mode_lib->vba.DPPOutputBufferPixels ++ / (mode_lib->vba.SwathWidthY[k] / 2); ++ } else if (mode_lib->vba.SwathWidthY[k] / 2 > mode_lib->vba.DPPOutputBufferPixels) { ++ DPPOutputBufferLinesC = 0.5; ++ } else { ++ DPPOutputBufferLinesC = 1; ++ } ++ ++ DPPOPPBufferingY = (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) ++ * (DPPOutputBufferLinesY + mode_lib->vba.OPPOutputBufferLines); ++ MaxDETBufferingTimeY = mode_lib->vba.FullDETBufferingTimeY[k] ++ + (mode_lib->vba.LinesInDETY[k] ++ - mode_lib->vba.LinesInDETYRoundedDownToSwath[k]) ++ / mode_lib->vba.SwathHeightY[k] ++ * (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]); ++ ++ ActiveDRAMClockChangeLatencyMarginY = DPPOPPBufferingY + EffectiveLBLatencyHidingY ++ + MaxDETBufferingTimeY - mode_lib->vba.DRAMClockChangeWatermark; ++ ++ if (mode_lib->vba.ActiveDPPs > 1) { ++ ActiveDRAMClockChangeLatencyMarginY = ++ ActiveDRAMClockChangeLatencyMarginY ++ - (1 - 1 / (mode_lib->vba.ActiveDPPs - 1)) ++ * mode_lib->vba.SwathHeightY[k] ++ * (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]); ++ } ++ ++ if (mode_lib->vba.BytePerPixelDETC[k] > 0) { ++ double DPPOPPBufferingC = (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) ++ * (DPPOutputBufferLinesC ++ + mode_lib->vba.OPPOutputBufferLines); ++ double MaxDETBufferingTimeC = ++ mode_lib->vba.FullDETBufferingTimeC[k] ++ + (mode_lib->vba.LinesInDETC[k] ++ - mode_lib->vba.LinesInDETCRoundedDownToSwath[k]) ++ / mode_lib->vba.SwathHeightC[k] ++ * (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]); ++ double ActiveDRAMClockChangeLatencyMarginC = DPPOPPBufferingC ++ + EffectiveLBLatencyHidingC + MaxDETBufferingTimeC ++ - mode_lib->vba.DRAMClockChangeWatermark; ++ ++ if (mode_lib->vba.ActiveDPPs > 1) { ++ ActiveDRAMClockChangeLatencyMarginC = ++ ActiveDRAMClockChangeLatencyMarginC ++ - (1 ++ - 1 ++ / (mode_lib->vba.ActiveDPPs ++ - 1)) ++ * mode_lib->vba.SwathHeightC[k] ++ * (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]); ++ } ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( ++ ActiveDRAMClockChangeLatencyMarginY, ++ ActiveDRAMClockChangeLatencyMarginC); ++ } else { ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ++ ActiveDRAMClockChangeLatencyMarginY; ++ } ++ ++ if (mode_lib->vba.WritebackEnable[k]) { ++ double WritebackDRAMClockChangeLatencyMargin; ++ ++ if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { ++ WritebackDRAMClockChangeLatencyMargin = ++ (double) (mode_lib->vba.WritebackInterfaceLumaBufferSize ++ + mode_lib->vba.WritebackInterfaceChromaBufferSize) ++ / (mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) ++ * 4) ++ - mode_lib->vba.WritebackDRAMClockChangeWatermark; ++ } else if (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { ++ WritebackDRAMClockChangeLatencyMargin = ++ dml_min( ++ (double) mode_lib->vba.WritebackInterfaceLumaBufferSize ++ * 8.0 / 10, ++ 2.0 ++ * mode_lib->vba.WritebackInterfaceChromaBufferSize ++ * 8 / 10) ++ / (mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k])) ++ - mode_lib->vba.WritebackDRAMClockChangeWatermark; ++ } else { ++ WritebackDRAMClockChangeLatencyMargin = ++ dml_min( ++ (double) mode_lib->vba.WritebackInterfaceLumaBufferSize, ++ 2.0 ++ * mode_lib->vba.WritebackInterfaceChromaBufferSize) ++ / (mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k])) ++ - mode_lib->vba.WritebackDRAMClockChangeWatermark; ++ } ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], ++ WritebackDRAMClockChangeLatencyMargin); ++ } ++ } ++ ++ mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] ++ < mode_lib->vba.MinActiveDRAMClockChangeMargin) { ++ mode_lib->vba.MinActiveDRAMClockChangeMargin = ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; ++ } ++ } ++ ++ mode_lib->vba.MinActiveDRAMClockChangeLatencySupported = ++ mode_lib->vba.MinActiveDRAMClockChangeMargin ++ + mode_lib->vba.DRAMClockChangeLatency; ++ ++ if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { ++ mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; ++ } else { ++ if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { ++ mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vblank; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) { ++ mode_lib->vba.DRAMClockChangeSupport[0][0] = ++ dm_dram_clock_change_unsupported; ++ } ++ } ++ } else { ++ mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_unsupported; ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.soc.num_states; k++) ++ for (j = 0; j < 2; j++) ++ mode_lib->vba.DRAMClockChangeSupport[k][j] = mode_lib->vba.DRAMClockChangeSupport[0][0]; ++ ++ //XFC Parameters: ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.XFCEnabled[k] == true) { ++ double TWait; ++ ++ mode_lib->vba.XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; ++ mode_lib->vba.XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; ++ mode_lib->vba.XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; ++ TWait = CalculateTWait( ++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], ++ mode_lib->vba.DRAMClockChangeLatency, ++ mode_lib->vba.UrgentLatencyPixelDataOnly, ++ mode_lib->vba.SREnterPlusExitTime); ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.SwathWidthY[k], ++ dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.XFCTSlvVupdateOffset, ++ mode_lib->vba.XFCTSlvVupdateWidth, ++ mode_lib->vba.XFCTSlvVreadyOffset, ++ mode_lib->vba.XFCXBUFLatencyTolerance, ++ mode_lib->vba.XFCFillBWOverhead, ++ mode_lib->vba.XFCSlvChunkSize, ++ mode_lib->vba.XFCBusTransportTime, ++ mode_lib->vba.TCalc, ++ TWait, ++ &mode_lib->vba.SrcActiveDrainRate, ++ &mode_lib->vba.TInitXFill, ++ &mode_lib->vba.TslvChk); ++ mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = ++ dml_floor( ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay ++ / (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]), ++ 1); ++ mode_lib->vba.XFCTransferDelay[k] = ++ dml_ceil( ++ mode_lib->vba.XFCBusTransportTime ++ / (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]), ++ 1); ++ mode_lib->vba.XFCPrechargeDelay[k] = ++ dml_ceil( ++ (mode_lib->vba.XFCBusTransportTime ++ + mode_lib->vba.TInitXFill ++ + mode_lib->vba.TslvChk) ++ / (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]), ++ 1); ++ mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance ++ * mode_lib->vba.SrcActiveDrainRate; ++ mode_lib->vba.FinalFillMargin = ++ (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] ++ + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k] ++ * mode_lib->vba.SrcActiveDrainRate ++ + mode_lib->vba.XFCFillConstant; ++ mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay ++ * mode_lib->vba.SrcActiveDrainRate ++ + mode_lib->vba.FinalFillMargin; ++ mode_lib->vba.RemainingFillLevel = dml_max( ++ 0.0, ++ mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); ++ mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel ++ / (mode_lib->vba.SrcActiveDrainRate ++ * mode_lib->vba.XFCFillBWOverhead / 100); ++ mode_lib->vba.XFCPrefetchMargin[k] = ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay ++ + mode_lib->vba.TFinalxFill ++ + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] ++ + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]; ++ } else { ++ mode_lib->vba.XFCSlaveVUpdateOffset[k] = 0; ++ mode_lib->vba.XFCSlaveVupdateWidth[k] = 0; ++ mode_lib->vba.XFCSlaveVReadyOffset[k] = 0; ++ mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = 0; ++ mode_lib->vba.XFCPrechargeDelay[k] = 0; ++ mode_lib->vba.XFCTransferDelay[k] = 0; ++ mode_lib->vba.XFCPrefetchMargin[k] = 0; ++ } ++ } ++ { ++ unsigned int VStartupMargin = 0; ++ bool FirstMainPlane = true; ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ unsigned int Margin = (mode_lib->vba.MaxVStartupLines[k] - mode_lib->vba.VStartup[k]) ++ * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]; ++ ++ if (FirstMainPlane) { ++ VStartupMargin = Margin; ++ FirstMainPlane = false; ++ } else ++ VStartupMargin = dml_min(VStartupMargin, Margin); ++ } ++ ++ if (mode_lib->vba.UseMaximumVStartup) { ++ if (mode_lib->vba.VTotal_Max[k] == mode_lib->vba.VTotal[k]) { ++ //only use max vstart if it is not drr or lateflip. ++ mode_lib->vba.VStartup[k] = mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]; ++ } ++ } ++ } ++} ++} ++ ++static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) ++{ ++ double BytePerPixDETY; ++ double BytePerPixDETC; ++ double Read256BytesBlockHeightY; ++ double Read256BytesBlockHeightC; ++ double Read256BytesBlockWidthY; ++ double Read256BytesBlockWidthC; ++ double MaximumSwathHeightY; ++ double MaximumSwathHeightC; ++ double MinimumSwathHeightY; ++ double MinimumSwathHeightC; ++ double SwathWidth; ++ double SwathWidthGranularityY; ++ double SwathWidthGranularityC; ++ double RoundedUpMaxSwathSizeBytesY; ++ double RoundedUpMaxSwathSizeBytesC; ++ unsigned int j, k; ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ bool MainPlaneDoesODMCombine = false; ++ ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { ++ BytePerPixDETY = 8; ++ BytePerPixDETC = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { ++ BytePerPixDETY = 4; ++ BytePerPixDETC = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { ++ BytePerPixDETY = 2; ++ BytePerPixDETC = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { ++ BytePerPixDETY = 1; ++ BytePerPixDETC = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { ++ BytePerPixDETY = 1; ++ BytePerPixDETC = 2; ++ } else { ++ BytePerPixDETY = 4.0 / 3.0; ++ BytePerPixDETC = 8.0 / 3.0; ++ } ++ ++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ Read256BytesBlockHeightY = 1; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { ++ Read256BytesBlockHeightY = 4; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { ++ Read256BytesBlockHeightY = 8; ++ } else { ++ Read256BytesBlockHeightY = 16; ++ } ++ Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) ++ / Read256BytesBlockHeightY; ++ Read256BytesBlockHeightC = 0; ++ Read256BytesBlockWidthC = 0; ++ } else { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ Read256BytesBlockHeightY = 1; ++ Read256BytesBlockHeightC = 1; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { ++ Read256BytesBlockHeightY = 16; ++ Read256BytesBlockHeightC = 8; ++ } else { ++ Read256BytesBlockHeightY = 8; ++ Read256BytesBlockHeightC = 8; ++ } ++ Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) ++ / Read256BytesBlockHeightY; ++ Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) ++ / Read256BytesBlockHeightC; ++ } ++ ++ if (mode_lib->vba.SourceScan[k] == dm_horz) { ++ MaximumSwathHeightY = Read256BytesBlockHeightY; ++ MaximumSwathHeightC = Read256BytesBlockHeightC; ++ } else { ++ MaximumSwathHeightY = Read256BytesBlockWidthY; ++ MaximumSwathHeightC = Read256BytesBlockWidthC; ++ } ++ ++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear ++ || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ && (mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_4kb_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_4kb_s_x ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s_t ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s_x ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_var_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_var_s_x) ++ && mode_lib->vba.SourceScan[k] == dm_horz)) { ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 ++ && mode_lib->vba.SourceScan[k] != dm_horz) { ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ } else { ++ MinimumSwathHeightY = MaximumSwathHeightY / 2.0; ++ } ++ MinimumSwathHeightC = MaximumSwathHeightC; ++ } else { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ MinimumSwathHeightC = MaximumSwathHeightC; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 ++ && mode_lib->vba.SourceScan[k] == dm_horz) { ++ MinimumSwathHeightY = MaximumSwathHeightY / 2.0; ++ MinimumSwathHeightC = MaximumSwathHeightC; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 ++ && mode_lib->vba.SourceScan[k] == dm_horz) { ++ MinimumSwathHeightC = MaximumSwathHeightC / 2.0; ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ } else { ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ MinimumSwathHeightC = MaximumSwathHeightC; ++ } ++ } ++ ++ if (mode_lib->vba.SourceScan[k] == dm_horz) { ++ SwathWidth = mode_lib->vba.ViewportWidth[k]; ++ } else { ++ SwathWidth = mode_lib->vba.ViewportHeight[k]; ++ } ++ ++ if (mode_lib->vba.ODMCombineEnabled[k] == true) { ++ MainPlaneDoesODMCombine = true; ++ } ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { ++ if (mode_lib->vba.BlendingAndTiming[k] == j ++ && mode_lib->vba.ODMCombineEnabled[j] == true) { ++ MainPlaneDoesODMCombine = true; ++ } ++ } ++ ++ if (MainPlaneDoesODMCombine == true) { ++ SwathWidth = dml_min( ++ SwathWidth, ++ mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); ++ } else { ++ if (mode_lib->vba.DPPPerPlane[k] == 0) ++ SwathWidth = 0; ++ else ++ SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; ++ } ++ ++ SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; ++ RoundedUpMaxSwathSizeBytesY = (dml_ceil( ++ (double) (SwathWidth - 1), ++ SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY ++ * MaximumSwathHeightY; ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { ++ RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) ++ + 256; ++ } ++ if (MaximumSwathHeightC > 0) { ++ SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) ++ / MaximumSwathHeightC; ++ RoundedUpMaxSwathSizeBytesC = (dml_ceil( ++ (double) (SwathWidth / 2.0 - 1), ++ SwathWidthGranularityC) + SwathWidthGranularityC) ++ * BytePerPixDETC * MaximumSwathHeightC; ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { ++ RoundedUpMaxSwathSizeBytesC = dml_ceil( ++ RoundedUpMaxSwathSizeBytesC, ++ 256) + 256; ++ } ++ } else ++ RoundedUpMaxSwathSizeBytesC = 0.0; ++ ++ if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC ++ <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { ++ mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; ++ mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; ++ } else { ++ mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; ++ mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; ++ } ++ ++ if (mode_lib->vba.SwathHeightC[k] == 0) { ++ mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte * 1024; ++ mode_lib->vba.DETBufferSizeC[k] = 0; ++ } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) { ++ mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte ++ * 1024.0 / 2; ++ mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte ++ * 1024.0 / 2; ++ } else { ++ mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte ++ * 1024.0 * 2 / 3; ++ mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte ++ * 1024.0 / 3; ++ } ++ } ++} ++ ++static double CalculateTWait( ++ unsigned int PrefetchMode, ++ double DRAMClockChangeLatency, ++ double UrgentLatencyPixelDataOnly, ++ double SREnterPlusExitTime) ++{ ++ if (PrefetchMode == 0) { ++ return dml_max( ++ DRAMClockChangeLatency + UrgentLatencyPixelDataOnly, ++ dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly)); ++ } else if (PrefetchMode == 1) { ++ return dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly); ++ } else { ++ return UrgentLatencyPixelDataOnly; ++ } ++} ++ ++static double CalculateRemoteSurfaceFlipDelay( ++ struct display_mode_lib *mode_lib, ++ double VRatio, ++ double SwathWidth, ++ double Bpp, ++ double LineTime, ++ double XFCTSlvVupdateOffset, ++ double XFCTSlvVupdateWidth, ++ double XFCTSlvVreadyOffset, ++ double XFCXBUFLatencyTolerance, ++ double XFCFillBWOverhead, ++ double XFCSlvChunkSize, ++ double XFCBusTransportTime, ++ double TCalc, ++ double TWait, ++ double *SrcActiveDrainRate, ++ double *TInitXFill, ++ double *TslvChk) ++{ ++ double TSlvSetup, AvgfillRate, result; ++ ++ *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; ++ TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; ++ *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); ++ AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); ++ *TslvChk = XFCSlvChunkSize / AvgfillRate; ++ dml_print( ++ "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", ++ *SrcActiveDrainRate); ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); ++ result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); ++ return result; ++} ++ ++static double CalculateWriteBackDelay( ++ enum source_format_class WritebackPixelFormat, ++ double WritebackHRatio, ++ double WritebackVRatio, ++ unsigned int WritebackLumaHTaps, ++ unsigned int WritebackLumaVTaps, ++ unsigned int WritebackChromaHTaps, ++ unsigned int WritebackChromaVTaps, ++ unsigned int WritebackDestinationWidth) ++{ ++ double CalculateWriteBackDelay = ++ dml_max( ++ dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, ++ WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) ++ * dml_ceil( ++ WritebackDestinationWidth ++ / 4.0, ++ 1) ++ + dml_ceil(1.0 / WritebackVRatio, 1) ++ * (dml_ceil( ++ WritebackLumaVTaps ++ / 4.0, ++ 1) + 4)); ++ ++ if (WritebackPixelFormat != dm_444_32) { ++ CalculateWriteBackDelay = ++ dml_max( ++ CalculateWriteBackDelay, ++ dml_max( ++ dml_ceil( ++ WritebackChromaHTaps ++ / 2.0, ++ 1) ++ / (2 ++ * WritebackHRatio), ++ WritebackChromaVTaps ++ * dml_ceil( ++ 1 ++ / (2 ++ * WritebackVRatio), ++ 1) ++ * dml_ceil( ++ WritebackDestinationWidth ++ / 2.0 ++ / 2.0, ++ 1) ++ + dml_ceil( ++ 1 ++ / (2 ++ * WritebackVRatio), ++ 1) ++ * (dml_ceil( ++ WritebackChromaVTaps ++ / 4.0, ++ 1) ++ + 4))); ++ } ++ return CalculateWriteBackDelay; ++} ++ ++static void CalculateActiveRowBandwidth( ++ bool GPUVMEnable, ++ enum source_format_class SourcePixelFormat, ++ double VRatio, ++ bool DCCEnable, ++ double LineTime, ++ unsigned int MetaRowByteLuma, ++ unsigned int MetaRowByteChroma, ++ unsigned int meta_row_height_luma, ++ unsigned int meta_row_height_chroma, ++ unsigned int PixelPTEBytesPerRowLuma, ++ unsigned int PixelPTEBytesPerRowChroma, ++ unsigned int dpte_row_height_luma, ++ unsigned int dpte_row_height_chroma, ++ double *meta_row_bw, ++ double *dpte_row_bw, ++ double *qual_row_bw) ++{ ++ if (DCCEnable != true) { ++ *meta_row_bw = 0; ++ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { ++ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) ++ + VRatio / 2 * MetaRowByteChroma ++ / (meta_row_height_chroma * LineTime); ++ } else { ++ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); ++ } ++ ++ if (GPUVMEnable != true) { ++ *dpte_row_bw = 0; ++ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { ++ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) ++ + VRatio / 2 * PixelPTEBytesPerRowChroma ++ / (dpte_row_height_chroma * LineTime); ++ } else { ++ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); ++ } ++ ++ if ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)) { ++ *qual_row_bw = *meta_row_bw + *dpte_row_bw; ++ } else { ++ *qual_row_bw = 0; ++ } ++} ++ ++static void CalculateFlipSchedule( ++ struct display_mode_lib *mode_lib, ++ double UrgentExtraLatency, ++ double UrgentLatencyPixelDataOnly, ++ unsigned int GPUVMMaxPageTableLevels, ++ bool GPUVMEnable, ++ double BandwidthAvailableForImmediateFlip, ++ unsigned int TotImmediateFlipBytes, ++ enum source_format_class SourcePixelFormat, ++ unsigned int ImmediateFlipBytes, ++ double LineTime, ++ double VRatio, ++ double Tno_bw, ++ double PDEAndMetaPTEBytesFrame, ++ unsigned int MetaRowByte, ++ unsigned int PixelPTEBytesPerRow, ++ bool DCCEnable, ++ unsigned int dpte_row_height, ++ unsigned int meta_row_height, ++ double qual_row_bw, ++ double *DestinationLinesToRequestVMInImmediateFlip, ++ double *DestinationLinesToRequestRowInImmediateFlip, ++ double *final_flip_bw, ++ bool *ImmediateFlipSupportedForPipe) ++{ ++ double min_row_time = 0.0; ++ ++ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { ++ *DestinationLinesToRequestVMInImmediateFlip = 0.0; ++ *DestinationLinesToRequestRowInImmediateFlip = 0.0; ++ *final_flip_bw = qual_row_bw; ++ *ImmediateFlipSupportedForPipe = true; ++ } else { ++ double TimeForFetchingMetaPTEImmediateFlip; ++ double TimeForFetchingRowInVBlankImmediateFlip; ++ ++ if (GPUVMEnable == true) { ++ mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip ++ * ImmediateFlipBytes / TotImmediateFlipBytes; ++ TimeForFetchingMetaPTEImmediateFlip = ++ dml_max( ++ Tno_bw ++ + PDEAndMetaPTEBytesFrame ++ / mode_lib->vba.ImmediateFlipBW[0], ++ dml_max( ++ UrgentExtraLatency ++ + UrgentLatencyPixelDataOnly ++ * (GPUVMMaxPageTableLevels ++ - 1), ++ LineTime / 4.0)); ++ } else { ++ TimeForFetchingMetaPTEImmediateFlip = 0; ++ } ++ ++ *DestinationLinesToRequestVMInImmediateFlip = dml_floor( ++ 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime + 0.125), ++ 1) / 4.0; ++ ++ if ((GPUVMEnable == true || DCCEnable == true)) { ++ mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip ++ * ImmediateFlipBytes / TotImmediateFlipBytes; ++ TimeForFetchingRowInVBlankImmediateFlip = dml_max( ++ (MetaRowByte + PixelPTEBytesPerRow) ++ / mode_lib->vba.ImmediateFlipBW[0], ++ dml_max(UrgentLatencyPixelDataOnly, LineTime / 4.0)); ++ } else { ++ TimeForFetchingRowInVBlankImmediateFlip = 0; ++ } ++ ++ *DestinationLinesToRequestRowInImmediateFlip = dml_floor( ++ 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime + 0.125), ++ 1) / 4.0; ++ ++ if (GPUVMEnable == true) { ++ *final_flip_bw = ++ dml_max( ++ PDEAndMetaPTEBytesFrame ++ / (*DestinationLinesToRequestVMInImmediateFlip ++ * LineTime), ++ (MetaRowByte + PixelPTEBytesPerRow) ++ / (TimeForFetchingRowInVBlankImmediateFlip ++ * LineTime)); ++ } else if (MetaRowByte + PixelPTEBytesPerRow > 0) { ++ *final_flip_bw = (MetaRowByte + PixelPTEBytesPerRow) ++ / (TimeForFetchingRowInVBlankImmediateFlip * LineTime); ++ } else { ++ *final_flip_bw = 0; ++ } ++ ++ if (GPUVMEnable && !DCCEnable) ++ min_row_time = dpte_row_height * LineTime / VRatio; ++ else if (!GPUVMEnable && DCCEnable) ++ min_row_time = meta_row_height * LineTime / VRatio; ++ else ++ min_row_time = dml_min(dpte_row_height, meta_row_height) * LineTime ++ / VRatio; ++ ++ if (*DestinationLinesToRequestVMInImmediateFlip >= 8 ++ || *DestinationLinesToRequestRowInImmediateFlip >= 16 ++ || TimeForFetchingMetaPTEImmediateFlip ++ + 2 * TimeForFetchingRowInVBlankImmediateFlip ++ > min_row_time) ++ *ImmediateFlipSupportedForPipe = false; ++ else ++ *ImmediateFlipSupportedForPipe = true; ++ } ++} ++ ++static unsigned int TruncToValidBPP( ++ double DecimalBPP, ++ bool DSCEnabled, ++ enum output_encoder_class Output, ++ enum output_format_class Format, ++ unsigned int DSCInputBitPerComponent) ++{ ++ if (Output == dm_hdmi) { ++ if (Format == dm_420) { ++ if (DecimalBPP >= 18) ++ return 18; ++ else if (DecimalBPP >= 15) ++ return 15; ++ else if (DecimalBPP >= 12) ++ return 12; ++ else ++ return BPP_INVALID; ++ } else if (Format == dm_444) { ++ if (DecimalBPP >= 36) ++ return 36; ++ else if (DecimalBPP >= 30) ++ return 30; ++ else if (DecimalBPP >= 24) ++ return 24; ++ else if (DecimalBPP >= 18) ++ return 18; ++ else ++ return BPP_INVALID; ++ } else { ++ if (DecimalBPP / 1.5 >= 24) ++ return 24; ++ else if (DecimalBPP / 1.5 >= 20) ++ return 20; ++ else if (DecimalBPP / 1.5 >= 16) ++ return 16; ++ else ++ return BPP_INVALID; ++ } ++ } else { ++ if (DSCEnabled) { ++ if (Format == dm_420) { ++ if (DecimalBPP < 6) ++ return BPP_INVALID; ++ else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1 / 16) ++ return 1.5 * DSCInputBitPerComponent - 1 / 16; ++ else ++ return dml_floor(16 * DecimalBPP, 1) / 16; ++ } else if (Format == dm_n422) { ++ if (DecimalBPP < 7) ++ return BPP_INVALID; ++ else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1 / 16) ++ return 2 * DSCInputBitPerComponent - 1 / 16; ++ else ++ return dml_floor(16 * DecimalBPP, 1) / 16; ++ } else { ++ if (DecimalBPP < 8) ++ return BPP_INVALID; ++ else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1 / 16) ++ return 3 * DSCInputBitPerComponent - 1 / 16; ++ else ++ return dml_floor(16 * DecimalBPP, 1) / 16; ++ } ++ } else if (Format == dm_420) { ++ if (DecimalBPP >= 18) ++ return 18; ++ else if (DecimalBPP >= 15) ++ return 15; ++ else if (DecimalBPP >= 12) ++ return 12; ++ else ++ return BPP_INVALID; ++ } else if (Format == dm_s422 || Format == dm_n422) { ++ if (DecimalBPP >= 24) ++ return 24; ++ else if (DecimalBPP >= 20) ++ return 20; ++ else if (DecimalBPP >= 16) ++ return 16; ++ else ++ return BPP_INVALID; ++ } else { ++ if (DecimalBPP >= 36) ++ return 36; ++ else if (DecimalBPP >= 30) ++ return 30; ++ else if (DecimalBPP >= 24) ++ return 24; ++ else if (DecimalBPP >= 18) ++ return 18; ++ else ++ return BPP_INVALID; ++ } ++ } ++} ++ ++void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ++{ ++ struct vba_vars_st *locals = &mode_lib->vba; ++ ++ int i; ++ unsigned int j, k, m; ++ ++ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ ++ ++ /*Scale Ratio, taps Support Check*/ ++ ++ mode_lib->vba.ScaleRatioAndTapsSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.ScalerEnabled[k] == false ++ && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) ++ || mode_lib->vba.HRatio[k] != 1.0 ++ || mode_lib->vba.htaps[k] != 1.0 ++ || mode_lib->vba.VRatio[k] != 1.0 ++ || mode_lib->vba.vtaps[k] != 1.0)) { ++ mode_lib->vba.ScaleRatioAndTapsSupport = false; ++ } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 ++ || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 ++ || (mode_lib->vba.htaps[k] > 1.0 ++ && (mode_lib->vba.htaps[k] % 2) == 1) ++ || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio ++ || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio ++ || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] ++ || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] ++ || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 ++ && (mode_lib->vba.HRatio[k] / 2.0 ++ > mode_lib->vba.HTAPsChroma[k] ++ || mode_lib->vba.VRatio[k] / 2.0 ++ > mode_lib->vba.VTAPsChroma[k]))) { ++ mode_lib->vba.ScaleRatioAndTapsSupport = false; ++ } ++ } ++ /*Source Format, Pixel Format and Scan Support Check*/ ++ ++ mode_lib->vba.SourceFormatPixelAndScanSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear ++ && mode_lib->vba.SourceScan[k] != dm_horz) ++ || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) ++ || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x ++ && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_420_8 ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_420_10)) ++ || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_gfx7_2d_thin_lvp) ++ && !((mode_lib->vba.SourcePixelFormat[k] ++ == dm_444_64 ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_444_32) ++ && mode_lib->vba.SourceScan[k] ++ == dm_horz ++ && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp ++ == true ++ && mode_lib->vba.DCCEnable[k] ++ == false)) ++ || (mode_lib->vba.DCCEnable[k] == true ++ && (mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_linear ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_420_8 ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_420_10)))) { ++ mode_lib->vba.SourceFormatPixelAndScanSupport = false; ++ } ++ } ++ /*Bandwidth Support Check*/ ++ ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { ++ locals->BytePerPixelInDETY[k] = 8.0; ++ locals->BytePerPixelInDETC[k] = 0.0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { ++ locals->BytePerPixelInDETY[k] = 4.0; ++ locals->BytePerPixelInDETC[k] = 0.0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { ++ locals->BytePerPixelInDETY[k] = 2.0; ++ locals->BytePerPixelInDETC[k] = 0.0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { ++ locals->BytePerPixelInDETY[k] = 1.0; ++ locals->BytePerPixelInDETC[k] = 0.0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { ++ locals->BytePerPixelInDETY[k] = 1.0; ++ locals->BytePerPixelInDETC[k] = 2.0; ++ } else { ++ locals->BytePerPixelInDETY[k] = 4.0 / 3; ++ locals->BytePerPixelInDETC[k] = 8.0 / 3; ++ } ++ if (mode_lib->vba.SourceScan[k] == dm_horz) { ++ locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; ++ } else { ++ locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0) ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; ++ locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0) ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0; ++ locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k]; ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true ++ && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { ++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) * 4.0; ++ } else if (mode_lib->vba.WritebackEnable[k] == true ++ && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { ++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) * 3.0; ++ } else if (mode_lib->vba.WritebackEnable[k] == true) { ++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) * 1.5; ++ } else { ++ locals->WriteBandwidth[k] = 0.0; ++ } ++ } ++ mode_lib->vba.DCCEnabledInAnyPlane = false; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.DCCEnable[k] == true) { ++ mode_lib->vba.DCCEnabledInAnyPlane = true; ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->FabricAndDRAMBandwidthPerState[i] = dml_min( ++ mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels ++ * mode_lib->vba.DRAMChannelWidth, ++ mode_lib->vba.FabricClockPerState[i] ++ * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000; ++ locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * locals->DCFCLKPerState[i], ++ locals->FabricAndDRAMBandwidthPerState[i] * 1000) ++ * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; ++ ++ locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; ++ ++ if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { ++ locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], ++ locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ++ ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 ++ / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] ++ * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); ++ } ++ locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * ++ locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency ++ + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); ++ ++ if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { ++ locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], ++ 4 * locals->ReturnBWToDCNPerState * ++ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 ++ * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / ++ dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency ++ + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); ++ } ++ ++ locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * ++ locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); ++ ++ if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { ++ locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], ++ locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ++ ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 ++ / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] ++ * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); ++ } ++ locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * ++ locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency ++ + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); ++ ++ if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { ++ locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], ++ 4 * locals->ReturnBWToDCNPerState * ++ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 ++ * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / ++ dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency ++ + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); ++ } ++ } ++ /*Writeback Latency support check*/ ++ ++ mode_lib->vba.WritebackLatencySupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { ++ if (locals->WriteBandwidth[k] ++ > (mode_lib->vba.WritebackInterfaceLumaBufferSize ++ + mode_lib->vba.WritebackInterfaceChromaBufferSize) ++ / mode_lib->vba.WritebackLatency) { ++ mode_lib->vba.WritebackLatencySupport = false; ++ } ++ } else { ++ if (locals->WriteBandwidth[k] ++ > 1.5 ++ * dml_min( ++ mode_lib->vba.WritebackInterfaceLumaBufferSize, ++ 2.0 ++ * mode_lib->vba.WritebackInterfaceChromaBufferSize) ++ / mode_lib->vba.WritebackLatency) { ++ mode_lib->vba.WritebackLatencySupport = false; ++ } ++ } ++ } ++ } ++ /*Re-ordering Buffer Support Check*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = ++ (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] ++ + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; ++ if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] ++ > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { ++ locals->ROBSupport[i] = true; ++ } else { ++ locals->ROBSupport[i] = false; ++ } ++ } ++ /*Writeback Mode Support Check*/ ++ ++ mode_lib->vba.TotalNumberOfActiveWriteback = 0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0) ++ mode_lib->vba.ActiveWritebacksPerPlane[k] = 1; ++ mode_lib->vba.TotalNumberOfActiveWriteback = ++ mode_lib->vba.TotalNumberOfActiveWriteback ++ + mode_lib->vba.ActiveWritebacksPerPlane[k]; ++ } ++ } ++ mode_lib->vba.WritebackModeSupport = true; ++ if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { ++ mode_lib->vba.WritebackModeSupport = false; ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true ++ && mode_lib->vba.Writeback10bpc420Supported != true ++ && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { ++ mode_lib->vba.WritebackModeSupport = false; ++ } ++ } ++ /*Writeback Scale Ratio and Taps Support Check*/ ++ ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false ++ && (mode_lib->vba.WritebackHRatio[k] != 1.0 ++ || mode_lib->vba.WritebackVRatio[k] != 1.0)) { ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; ++ } ++ if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio ++ || mode_lib->vba.WritebackVRatio[k] ++ > mode_lib->vba.WritebackMaxVSCLRatio ++ || mode_lib->vba.WritebackHRatio[k] ++ < mode_lib->vba.WritebackMinHSCLRatio ++ || mode_lib->vba.WritebackVRatio[k] ++ < mode_lib->vba.WritebackMinVSCLRatio ++ || mode_lib->vba.WritebackLumaHTaps[k] ++ > mode_lib->vba.WritebackMaxHSCLTaps ++ || mode_lib->vba.WritebackLumaVTaps[k] ++ > mode_lib->vba.WritebackMaxVSCLTaps ++ || mode_lib->vba.WritebackHRatio[k] ++ > mode_lib->vba.WritebackLumaHTaps[k] ++ || mode_lib->vba.WritebackVRatio[k] ++ > mode_lib->vba.WritebackLumaVTaps[k] ++ || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 ++ && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) ++ == 1)) ++ || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 ++ && (mode_lib->vba.WritebackChromaHTaps[k] ++ > mode_lib->vba.WritebackMaxHSCLTaps ++ || mode_lib->vba.WritebackChromaVTaps[k] ++ > mode_lib->vba.WritebackMaxVSCLTaps ++ || 2.0 ++ * mode_lib->vba.WritebackHRatio[k] ++ > mode_lib->vba.WritebackChromaHTaps[k] ++ || 2.0 ++ * mode_lib->vba.WritebackVRatio[k] ++ > mode_lib->vba.WritebackChromaVTaps[k] ++ || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 ++ && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; ++ } ++ if (mode_lib->vba.WritebackVRatio[k] < 1.0) { ++ mode_lib->vba.WritebackLumaVExtra = ++ dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); ++ } else { ++ mode_lib->vba.WritebackLumaVExtra = -1; ++ } ++ if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 ++ && mode_lib->vba.WritebackLumaVTaps[k] ++ > (mode_lib->vba.WritebackLineBufferLumaBufferSize ++ + mode_lib->vba.WritebackLineBufferChromaBufferSize) ++ / 3.0 ++ / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackLumaVExtra) ++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 ++ && mode_lib->vba.WritebackLumaVTaps[k] ++ > mode_lib->vba.WritebackLineBufferLumaBufferSize ++ * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackLumaVExtra) ++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 ++ && mode_lib->vba.WritebackLumaVTaps[k] ++ > mode_lib->vba.WritebackLineBufferLumaBufferSize ++ * 8.0 / 10.0 ++ / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackLumaVExtra)) { ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; ++ } ++ if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { ++ mode_lib->vba.WritebackChromaVExtra = 0.0; ++ } else { ++ mode_lib->vba.WritebackChromaVExtra = -1; ++ } ++ if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 ++ && mode_lib->vba.WritebackChromaVTaps[k] ++ > mode_lib->vba.WritebackLineBufferChromaBufferSize ++ * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackChromaVExtra) ++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 ++ && mode_lib->vba.WritebackChromaVTaps[k] ++ > mode_lib->vba.WritebackLineBufferChromaBufferSize ++ * 8.0 / 10.0 ++ / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackChromaVExtra)) { ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; ++ } ++ } ++ } ++ /*Maximum DISPCLK/DPPCLK Support check*/ ++ ++ mode_lib->vba.WritebackRequiredDISPCLK = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ mode_lib->vba.WritebackRequiredDISPCLK = ++ dml_max( ++ mode_lib->vba.WritebackRequiredDISPCLK, ++ CalculateWriteBackDISPCLK( ++ mode_lib->vba.WritebackPixelFormat[k], ++ mode_lib->vba.PixelClock[k], ++ mode_lib->vba.WritebackHRatio[k], ++ mode_lib->vba.WritebackVRatio[k], ++ mode_lib->vba.WritebackLumaHTaps[k], ++ mode_lib->vba.WritebackLumaVTaps[k], ++ mode_lib->vba.WritebackChromaHTaps[k], ++ mode_lib->vba.WritebackChromaVTaps[k], ++ mode_lib->vba.WritebackDestinationWidth[k], ++ mode_lib->vba.HTotal[k], ++ mode_lib->vba.WritebackChromaLineBufferWidth)); ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.HRatio[k] > 1.0) { ++ locals->PSCL_FACTOR[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput ++ * mode_lib->vba.HRatio[k] ++ / dml_ceil( ++ mode_lib->vba.htaps[k] ++ / 6.0, ++ 1.0)); ++ } else { ++ locals->PSCL_FACTOR[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput); ++ } ++ if (locals->BytePerPixelInDETC[k] == 0.0) { ++ locals->PSCL_FACTOR_CHROMA[k] = 0.0; ++ locals->MinDPPCLKUsingSingleDPP[k] = ++ mode_lib->vba.PixelClock[k] ++ * dml_max3( ++ mode_lib->vba.vtaps[k] / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k]), ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / locals->PSCL_FACTOR[k], ++ 1.0); ++ if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) ++ && locals->MinDPPCLKUsingSingleDPP[k] ++ < 2.0 * mode_lib->vba.PixelClock[k]) { ++ locals->MinDPPCLKUsingSingleDPP[k] = 2.0 ++ * mode_lib->vba.PixelClock[k]; ++ } ++ } else { ++ if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { ++ locals->PSCL_FACTOR_CHROMA[k] = ++ dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput ++ * mode_lib->vba.HRatio[k] ++ / 2.0 ++ / dml_ceil( ++ mode_lib->vba.HTAPsChroma[k] ++ / 6.0, ++ 1.0)); ++ } else { ++ locals->PSCL_FACTOR_CHROMA[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput); ++ } ++ locals->MinDPPCLKUsingSingleDPP[k] = ++ mode_lib->vba.PixelClock[k] ++ * dml_max5( ++ mode_lib->vba.vtaps[k] / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k]), ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / locals->PSCL_FACTOR[k], ++ mode_lib->vba.VTAPsChroma[k] ++ / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k] ++ / 2.0), ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / 4.0 ++ / locals->PSCL_FACTOR_CHROMA[k], ++ 1.0); ++ if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 ++ || mode_lib->vba.HTAPsChroma[k] > 6.0 ++ || mode_lib->vba.VTAPsChroma[k] > 6.0) ++ && locals->MinDPPCLKUsingSingleDPP[k] ++ < 2.0 * mode_lib->vba.PixelClock[k]) { ++ locals->MinDPPCLKUsingSingleDPP[k] = 2.0 ++ * mode_lib->vba.PixelClock[k]; ++ } ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ Calculate256BBlockSizes( ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0), ++ dml_ceil(locals->BytePerPixelInDETC[k], 2.0), ++ &locals->Read256BlockHeightY[k], ++ &locals->Read256BlockHeightC[k], ++ &locals->Read256BlockWidthY[k], ++ &locals->Read256BlockWidthC[k]); ++ if (mode_lib->vba.SourceScan[k] == dm_horz) { ++ locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k]; ++ locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k]; ++ } else { ++ locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k]; ++ locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k]; ++ } ++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear ++ || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ && (mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_4kb_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_4kb_s_x ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s_t ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s_x ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_var_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_var_s_x) ++ && mode_lib->vba.SourceScan[k] == dm_horz)) { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; ++ } else { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] ++ / 2.0; ++ } ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; ++ } else { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 ++ && mode_lib->vba.SourceScan[k] == dm_horz) { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] ++ / 2.0; ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 ++ && mode_lib->vba.SourceScan[k] == dm_horz) { ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k] ++ / 2.0; ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; ++ } else { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; ++ } ++ } ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ mode_lib->vba.MaximumSwathWidthSupport = 8192.0; ++ } else { ++ mode_lib->vba.MaximumSwathWidthSupport = 5120.0; ++ } ++ mode_lib->vba.MaximumSwathWidthInDETBuffer = ++ dml_min( ++ mode_lib->vba.MaximumSwathWidthSupport, ++ mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0 ++ / (locals->BytePerPixelInDETY[k] ++ * locals->MinSwathHeightY[k] ++ + locals->BytePerPixelInDETC[k] ++ / 2.0 ++ * locals->MinSwathHeightC[k])); ++ if (locals->BytePerPixelInDETC[k] == 0.0) { ++ mode_lib->vba.MaximumSwathWidthInLineBuffer = ++ mode_lib->vba.LineBufferSize ++ * dml_max(mode_lib->vba.HRatio[k], 1.0) ++ / mode_lib->vba.LBBitPerPixel[k] ++ / (mode_lib->vba.vtaps[k] ++ + dml_max( ++ dml_ceil( ++ mode_lib->vba.VRatio[k], ++ 1.0) ++ - 2, ++ 0.0)); ++ } else { ++ mode_lib->vba.MaximumSwathWidthInLineBuffer = ++ dml_min( ++ mode_lib->vba.LineBufferSize ++ * dml_max( ++ mode_lib->vba.HRatio[k], ++ 1.0) ++ / mode_lib->vba.LBBitPerPixel[k] ++ / (mode_lib->vba.vtaps[k] ++ + dml_max( ++ dml_ceil( ++ mode_lib->vba.VRatio[k], ++ 1.0) ++ - 2, ++ 0.0)), ++ 2.0 * mode_lib->vba.LineBufferSize ++ * dml_max( ++ mode_lib->vba.HRatio[k] ++ / 2.0, ++ 1.0) ++ / mode_lib->vba.LBBitPerPixel[k] ++ / (mode_lib->vba.VTAPsChroma[k] ++ + dml_max( ++ dml_ceil( ++ mode_lib->vba.VRatio[k] ++ / 2.0, ++ 1.0) ++ - 2, ++ 0.0))); ++ } ++ locals->MaximumSwathWidth[k] = dml_min( ++ mode_lib->vba.MaximumSwathWidthInDETBuffer, ++ mode_lib->vba.MaximumSwathWidthInLineBuffer); ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( ++ mode_lib->vba.MaxDispclk[i], ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( ++ mode_lib->vba.MaxDppclk[i], ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ locals->RequiredDISPCLK[i][j] = 0.0; ++ locals->DISPCLK_DPPCLK_Support[i][j] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = ++ mode_lib->vba.PixelClock[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) ++ * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); ++ if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i] ++ && i == mode_lib->vba.soc.num_states) ++ mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k] ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ ++ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0); ++ if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i] ++ && i == mode_lib->vba.soc.num_states) ++ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { ++ locals->ODMCombineEnablePerState[i][k] = false; ++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; ++ } else { ++ locals->ODMCombineEnablePerState[i][k] = true; ++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; ++ } ++ if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity ++ && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] ++ && locals->ODMCombineEnablePerState[i][k] == false) { ++ locals->NoOfDPP[i][j][k] = 1; ++ locals->RequiredDPPCLK[i][j][k] = ++ locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ } else { ++ locals->NoOfDPP[i][j][k] = 2; ++ locals->RequiredDPPCLK[i][j][k] = ++ locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; ++ } ++ locals->RequiredDISPCLK[i][j] = dml_max( ++ locals->RequiredDISPCLK[i][j], ++ mode_lib->vba.PlaneRequiredDISPCLK); ++ if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) ++ > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) ++ || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { ++ locals->DISPCLK_DPPCLK_Support[i][j] = false; ++ } ++ } ++ locals->TotalNumberOfActiveDPP[i][j] = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) ++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; ++ if (j == 1) { ++ while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP ++ && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) { ++ double BWOfNonSplitPlaneOfMaximumBandwidth; ++ unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; ++ ++ BWOfNonSplitPlaneOfMaximumBandwidth = 0; ++ NumberOfNonSplitPlaneOfMaximumBandwidth = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) { ++ BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k]; ++ NumberOfNonSplitPlaneOfMaximumBandwidth = k; ++ } ++ } ++ locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; ++ locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = ++ locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; ++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1; ++ } ++ } ++ if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) { ++ locals->RequiredDISPCLK[i][j] = 0.0; ++ locals->DISPCLK_DPPCLK_Support[i][j] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->ODMCombineEnablePerState[i][k] = false; ++ if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { ++ locals->NoOfDPP[i][j][k] = 1; ++ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] ++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ } else { ++ locals->NoOfDPP[i][j][k] = 2; ++ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] ++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; ++ } ++ if (i != mode_lib->vba.soc.num_states) { ++ mode_lib->vba.PlaneRequiredDISPCLK = ++ mode_lib->vba.PixelClock[k] ++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) ++ * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); ++ } else { ++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k] ++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ } ++ locals->RequiredDISPCLK[i][j] = dml_max( ++ locals->RequiredDISPCLK[i][j], ++ mode_lib->vba.PlaneRequiredDISPCLK); ++ if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) ++ > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity ++ || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) ++ locals->DISPCLK_DPPCLK_Support[i][j] = false; ++ } ++ locals->TotalNumberOfActiveDPP[i][j] = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) ++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; ++ } ++ locals->RequiredDISPCLK[i][j] = dml_max( ++ locals->RequiredDISPCLK[i][j], ++ mode_lib->vba.WritebackRequiredDISPCLK); ++ if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity ++ < mode_lib->vba.WritebackRequiredDISPCLK) { ++ locals->DISPCLK_DPPCLK_Support[i][j] = false; ++ } ++ } ++ } ++ /*Viewport Size Check*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->ViewportSizeSupport[i] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->ODMCombineEnablePerState[i][k] == true) { ++ if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) ++ > locals->MaximumSwathWidth[k]) { ++ locals->ViewportSizeSupport[i] = false; ++ } ++ } else { ++ if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { ++ locals->ViewportSizeSupport[i] = false; ++ } ++ } ++ } ++ } ++ /*Total Available Pipes Support Check*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP) ++ locals->TotalAvailablePipesSupport[i][j] = true; ++ else ++ locals->TotalAvailablePipesSupport[i][j] = false; ++ } ++ } ++ /*Total Available OTG Support Check*/ ++ ++ mode_lib->vba.TotalNumberOfActiveOTG = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG ++ + 1.0; ++ } ++ } ++ if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { ++ mode_lib->vba.NumberOfOTGSupport = true; ++ } else { ++ mode_lib->vba.NumberOfOTGSupport = false; ++ } ++ /*Display IO and DSC Support Check*/ ++ ++ mode_lib->vba.NonsupportedDSCInputBPC = false; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 ++ || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 ++ || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { ++ mode_lib->vba.NonsupportedDSCInputBPC = true; ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->RequiresDSC[i][k] = 0; ++ locals->RequiresFEC[i][k] = 0; ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ if (mode_lib->vba.Output[k] == dm_hdmi) { ++ locals->RequiresDSC[i][k] = 0; ++ locals->RequiresFEC[i][k] = 0; ++ locals->OutputBppPerState[i][k] = TruncToValidBPP( ++ dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24, ++ false, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ } else if (mode_lib->vba.Output[k] == dm_dp ++ || mode_lib->vba.Output[k] == dm_edp) { ++ if (mode_lib->vba.Output[k] == dm_edp) { ++ mode_lib->vba.EffectiveFECOverhead = 0.0; ++ } else { ++ mode_lib->vba.EffectiveFECOverhead = ++ mode_lib->vba.FECOverhead; ++ } ++ if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { ++ mode_lib->vba.Outbpp = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ false, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ mode_lib->vba.OutbppDSC = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ true, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ if (mode_lib->vba.DSCEnabled[k] == true) { ++ locals->RequiresDSC[i][k] = true; ++ if (mode_lib->vba.Output[k] == dm_dp) { ++ locals->RequiresFEC[i][k] = true; ++ } else { ++ locals->RequiresFEC[i][k] = false; ++ } ++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; ++ } else { ++ locals->RequiresDSC[i][k] = false; ++ locals->RequiresFEC[i][k] = false; ++ } ++ locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; ++ } ++ if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) { ++ mode_lib->vba.Outbpp = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ false, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ mode_lib->vba.OutbppDSC = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ true, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ if (mode_lib->vba.DSCEnabled[k] == true) { ++ locals->RequiresDSC[i][k] = true; ++ if (mode_lib->vba.Output[k] == dm_dp) { ++ locals->RequiresFEC[i][k] = true; ++ } else { ++ locals->RequiresFEC[i][k] = false; ++ } ++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; ++ } else { ++ locals->RequiresDSC[i][k] = false; ++ locals->RequiresFEC[i][k] = false; ++ } ++ locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; ++ } ++ if (mode_lib->vba.Outbpp == BPP_INVALID ++ && mode_lib->vba.PHYCLKPerState[i] ++ >= 810.0) { ++ mode_lib->vba.Outbpp = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ false, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ mode_lib->vba.OutbppDSC = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ true, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) { ++ locals->RequiresDSC[i][k] = true; ++ if (mode_lib->vba.Output[k] == dm_dp) { ++ locals->RequiresFEC[i][k] = true; ++ } else { ++ locals->RequiresFEC[i][k] = false; ++ } ++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; ++ } else { ++ locals->RequiresDSC[i][k] = false; ++ locals->RequiresFEC[i][k] = false; ++ } ++ locals->OutputBppPerState[i][k] = ++ mode_lib->vba.Outbpp; ++ } ++ } ++ } else { ++ locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE; ++ } ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->DIOSupport[i] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->OutputBppPerState[i][k] == BPP_INVALID ++ || (mode_lib->vba.OutputFormat[k] == dm_420 ++ && mode_lib->vba.Interlace[k] == true ++ && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) { ++ locals->DIOSupport[i] = false; ++ } ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->DSCCLKRequiredMoreThanSupported[i] = false; ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ if ((mode_lib->vba.Output[k] == dm_dp ++ || mode_lib->vba.Output[k] == dm_edp)) { ++ if (mode_lib->vba.OutputFormat[k] == dm_420 ++ || mode_lib->vba.OutputFormat[k] ++ == dm_n422) { ++ mode_lib->vba.DSCFormatFactor = 2; ++ } else { ++ mode_lib->vba.DSCFormatFactor = 1; ++ } ++ if (locals->RequiresDSC[i][k] == true) { ++ if (locals->ODMCombineEnablePerState[i][k] ++ == true) { ++ if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor ++ > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { ++ locals->DSCCLKRequiredMoreThanSupported[i] = ++ true; ++ } ++ } else { ++ if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor ++ > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { ++ locals->DSCCLKRequiredMoreThanSupported[i] = ++ true; ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->NotEnoughDSCUnits[i] = false; ++ mode_lib->vba.TotalDSCUnitsRequired = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->RequiresDSC[i][k] == true) { ++ if (locals->ODMCombineEnablePerState[i][k] == true) { ++ mode_lib->vba.TotalDSCUnitsRequired = ++ mode_lib->vba.TotalDSCUnitsRequired + 2.0; ++ } else { ++ mode_lib->vba.TotalDSCUnitsRequired = ++ mode_lib->vba.TotalDSCUnitsRequired + 1.0; ++ } ++ } ++ } ++ if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { ++ locals->NotEnoughDSCUnits[i] = true; ++ } ++ } ++ /*DSC Delay per state*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.BlendingAndTiming[k] != k) { ++ mode_lib->vba.slices = 0; ++ } else if (locals->RequiresDSC[i][k] == 0 ++ || locals->RequiresDSC[i][k] == false) { ++ mode_lib->vba.slices = 0; ++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { ++ mode_lib->vba.slices = dml_ceil( ++ mode_lib->vba.PixelClockBackEnd[k] / 400.0, ++ 4.0); ++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { ++ mode_lib->vba.slices = 8.0; ++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { ++ mode_lib->vba.slices = 4.0; ++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { ++ mode_lib->vba.slices = 2.0; ++ } else { ++ mode_lib->vba.slices = 1.0; ++ } ++ if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE ++ || locals->OutputBppPerState[i][k] == BPP_INVALID) { ++ mode_lib->vba.bpp = 0.0; ++ } else { ++ mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; ++ } ++ if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { ++ if (locals->ODMCombineEnablePerState[i][k] == false) { ++ locals->DSCDelayPerState[i][k] = ++ dscceComputeDelay( ++ mode_lib->vba.DSCInputBitPerComponent[k], ++ mode_lib->vba.bpp, ++ dml_ceil( ++ mode_lib->vba.HActive[k] ++ / mode_lib->vba.slices, ++ 1.0), ++ mode_lib->vba.slices, ++ mode_lib->vba.OutputFormat[k]) ++ + dscComputeDelay( ++ mode_lib->vba.OutputFormat[k]); ++ } else { ++ locals->DSCDelayPerState[i][k] = ++ 2.0 * (dscceComputeDelay( ++ mode_lib->vba.DSCInputBitPerComponent[k], ++ mode_lib->vba.bpp, ++ dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0), ++ mode_lib->vba.slices / 2, ++ mode_lib->vba.OutputFormat[k]) ++ + dscComputeDelay(mode_lib->vba.OutputFormat[k])); ++ } ++ locals->DSCDelayPerState[i][k] = ++ locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k]; ++ } else { ++ locals->DSCDelayPerState[i][k] = 0.0; ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { ++ for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true) ++ locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m]; ++ } ++ } ++ } ++ } ++ ++ //Prefetch Check ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->ODMCombineEnablePerState[i][k] == true) ++ locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); ++ else ++ locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; ++ locals->SwathWidthGranularityY = 256 / dml_ceil(locals->BytePerPixelInDETY[k], 1) / locals->MaxSwathHeightY[k]; ++ locals->RoundedUpMaxSwathSizeBytesY = (dml_ceil(locals->SwathWidthYPerState[i][j][k] - 1, locals->SwathWidthGranularityY) ++ + locals->SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k]; ++ if (locals->SourcePixelFormat[k] == dm_420_10) { ++ locals->RoundedUpMaxSwathSizeBytesY = dml_ceil(locals->RoundedUpMaxSwathSizeBytesY, 256) + 256; ++ } ++ if (locals->MaxSwathHeightC[k] > 0) { ++ locals->SwathWidthGranularityC = 256 / dml_ceil(locals->BytePerPixelInDETC[k], 2) / locals->MaxSwathHeightC[k]; ++ ++ locals->RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYPerState[i][j][k] / 2 - 1, locals->SwathWidthGranularityC) ++ + locals->SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k]; ++ } ++ if (locals->SourcePixelFormat[k] == dm_420_10) { ++ locals->RoundedUpMaxSwathSizeBytesC = dml_ceil(locals->RoundedUpMaxSwathSizeBytesC, 256) + 256; ++ } else { ++ locals->RoundedUpMaxSwathSizeBytesC = 0; ++ } ++ ++ if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte * 1024 / 2) { ++ locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k]; ++ locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k]; ++ } else { ++ locals->SwathHeightYPerState[i][j][k] = locals->MinSwathHeightY[k]; ++ locals->SwathHeightCPerState[i][j][k] = locals->MinSwathHeightC[k]; ++ } ++ ++ if (locals->BytePerPixelInDETC[k] == 0) { ++ locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; ++ locals->LinesInDETChroma = 0; ++ } else if (locals->SwathHeightYPerState[i][j][k] <= locals->SwathHeightCPerState[i][j][k]) { ++ locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETY[k] / ++ locals->SwathWidthYPerState[i][j][k]; ++ locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2); ++ } else { ++ locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; ++ locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2); ++ } ++ ++ locals->EffectiveLBLatencyHidingSourceLinesLuma = dml_min(locals->MaxLineBufferLines, ++ dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] / (locals->SwathWidthYPerState[i][j][k] ++ / dml_max(locals->HRatio[k], 1)), 1)) - (locals->vtaps[k] - 1); ++ ++ locals->EffectiveLBLatencyHidingSourceLinesChroma = dml_min(locals->MaxLineBufferLines, ++ dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] ++ / (locals->SwathWidthYPerState[i][j][k] / 2 ++ / dml_max(locals->HRatio[k] / 2, 1)), 1)) - (locals->VTAPsChroma[k] - 1); ++ ++ locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( ++ locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * ++ locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], ++ locals->EffectiveLBLatencyHidingSourceLinesLuma), ++ locals->SwathHeightYPerState[i][j][k]); ++ ++ locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( ++ locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * ++ locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], ++ locals->EffectiveLBLatencyHidingSourceLinesChroma), ++ locals->SwathHeightCPerState[i][j][k]); ++ ++ if (locals->BytePerPixelInDETC[k] == 0) { ++ locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) ++ / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * ++ dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); ++ } else { ++ locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( ++ locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) ++ / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * ++ dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), ++ locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - ++ locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * ++ dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); ++ } ++ } ++ } ++ } ++ ++ for (i = 0; i <= locals->soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ locals->UrgentLatencySupport[i][j] = true; ++ for (k = 0; k < locals->NumberOfActivePlanes; k++) { ++ if (locals->UrgentLatencySupportUsPerState[i][j][k] < locals->UrgentLatency) ++ locals->UrgentLatencySupport[i][j] = false; ++ } ++ } ++ } ++ ++ ++ /*Prefetch Check*/ ++ for (i = 0; i <= locals->soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ locals->TotalNumberOfDCCActiveDPP[i][j] = 0; ++ for (k = 0; k < locals->NumberOfActivePlanes; k++) { ++ if (locals->DCCEnable[k] == true) { ++ locals->TotalNumberOfDCCActiveDPP[i][j] = ++ locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; ++ } ++ } ++ } ++ } ++ ++ CalculateMinAndMaxPrefetchMode(locals->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &locals->MinPrefetchMode, &locals->MaxPrefetchMode); ++ ++ locals->MaxTotalVActiveRDBandwidth = 0; ++ for (k = 0; k < locals->NumberOfActivePlanes; k++) { ++ locals->MaxTotalVActiveRDBandwidth = locals->MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; ++ } ++ ++ for (i = 0; i <= locals->soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ for (k = 0; k < locals->NumberOfActivePlanes; k++) { ++ locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; ++ locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; ++ locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; ++ locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; ++ locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; ++ mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ mode_lib->vba.PixelClock[k] / 16.0); ++ if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { ++ if (mode_lib->vba.VRatio[k] <= 1.0) { ++ mode_lib->vba.ProjectedDCFCLKDeepSleep = ++ dml_max( ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ 1.1 ++ * dml_ceil( ++ mode_lib->vba.BytePerPixelInDETY[k], ++ 1.0) ++ / 64.0 ++ * mode_lib->vba.HRatio[k] ++ * mode_lib->vba.PixelClock[k] ++ / mode_lib->vba.NoOfDPP[i][j][k]); ++ } else { ++ mode_lib->vba.ProjectedDCFCLKDeepSleep = ++ dml_max( ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ 1.1 ++ * dml_ceil( ++ mode_lib->vba.BytePerPixelInDETY[k], ++ 1.0) ++ / 64.0 ++ * mode_lib->vba.PSCL_FACTOR[k] ++ * mode_lib->vba.RequiredDPPCLK[i][j][k]); ++ } ++ } else { ++ if (mode_lib->vba.VRatio[k] <= 1.0) { ++ mode_lib->vba.ProjectedDCFCLKDeepSleep = ++ dml_max( ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ 1.1 ++ * dml_ceil( ++ mode_lib->vba.BytePerPixelInDETY[k], ++ 1.0) ++ / 32.0 ++ * mode_lib->vba.HRatio[k] ++ * mode_lib->vba.PixelClock[k] ++ / mode_lib->vba.NoOfDPP[i][j][k]); ++ } else { ++ mode_lib->vba.ProjectedDCFCLKDeepSleep = ++ dml_max( ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ 1.1 ++ * dml_ceil( ++ mode_lib->vba.BytePerPixelInDETY[k], ++ 1.0) ++ / 32.0 ++ * mode_lib->vba.PSCL_FACTOR[k] ++ * mode_lib->vba.RequiredDPPCLK[i][j][k]); ++ } ++ if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { ++ mode_lib->vba.ProjectedDCFCLKDeepSleep = ++ dml_max( ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ 1.1 ++ * dml_ceil( ++ mode_lib->vba.BytePerPixelInDETC[k], ++ 2.0) ++ / 32.0 ++ * mode_lib->vba.HRatio[k] ++ / 2.0 ++ * mode_lib->vba.PixelClock[k] ++ / mode_lib->vba.NoOfDPP[i][j][k]); ++ } else { ++ mode_lib->vba.ProjectedDCFCLKDeepSleep = ++ dml_max( ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ 1.1 ++ * dml_ceil( ++ mode_lib->vba.BytePerPixelInDETC[k], ++ 2.0) ++ / 32.0 ++ * mode_lib->vba.PSCL_FACTOR_CHROMA[k] ++ * mode_lib->vba.RequiredDPPCLK[i][j][k]); ++ } ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( ++ mode_lib, ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.Read256BlockHeightY[k], ++ mode_lib->vba.Read256BlockWidthY[k], ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0), ++ mode_lib->vba.SourceScan[k], ++ mode_lib->vba.ViewportWidth[k], ++ mode_lib->vba.ViewportHeight[k], ++ mode_lib->vba.SwathWidthYPerState[i][j][k], ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.VMMPageSize, ++ mode_lib->vba.PTEBufferSizeInRequestsLuma, ++ mode_lib->vba.PDEProcessingBufIn64KBReqs, ++ mode_lib->vba.PitchY[k], ++ mode_lib->vba.DCCMetaPitchY[k], ++ &mode_lib->vba.MacroTileWidthY[k], ++ &mode_lib->vba.MetaRowBytesY, ++ &mode_lib->vba.DPTEBytesPerRowY, ++ &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], ++ &mode_lib->vba.dpte_row_height[k], ++ &mode_lib->vba.meta_row_height[k]); ++ mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.vtaps[k], ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ mode_lib->vba.SwathHeightYPerState[i][j][k], ++ mode_lib->vba.ViewportYStartY[k], ++ &mode_lib->vba.PrefillY[k], ++ &mode_lib->vba.MaxNumSwY[k]); ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( ++ mode_lib, ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.Read256BlockHeightY[k], ++ mode_lib->vba.Read256BlockWidthY[k], ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0), ++ mode_lib->vba.SourceScan[k], ++ mode_lib->vba.ViewportWidth[k] / 2.0, ++ mode_lib->vba.ViewportHeight[k] / 2.0, ++ mode_lib->vba.SwathWidthYPerState[i][j][k] / 2.0, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.VMMPageSize, ++ mode_lib->vba.PTEBufferSizeInRequestsLuma, ++ mode_lib->vba.PDEProcessingBufIn64KBReqs, ++ mode_lib->vba.PitchC[k], ++ 0.0, ++ &mode_lib->vba.MacroTileWidthC[k], ++ &mode_lib->vba.MetaRowBytesC, ++ &mode_lib->vba.DPTEBytesPerRowC, ++ &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], ++ &mode_lib->vba.dpte_row_height_chroma[k], ++ &mode_lib->vba.meta_row_height_chroma[k]); ++ mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( ++ mode_lib, ++ mode_lib->vba.VRatio[k] / 2.0, ++ mode_lib->vba.VTAPsChroma[k], ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ mode_lib->vba.SwathHeightCPerState[i][j][k], ++ mode_lib->vba.ViewportYStartC[k], ++ &mode_lib->vba.PrefillC[k], ++ &mode_lib->vba.MaxNumSwC[k]); ++ } else { ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; ++ mode_lib->vba.MetaRowBytesC = 0.0; ++ mode_lib->vba.DPTEBytesPerRowC = 0.0; ++ locals->PrefetchLinesC[k] = 0.0; ++ locals->PTEBufferSizeNotExceededC[i][j][k] = true; ++ locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; ++ } ++ locals->PDEAndMetaPTEBytesPerFrame[k] = ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; ++ locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; ++ locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; ++ ++ CalculateActiveRowBandwidth( ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.MetaRowBytesY, ++ mode_lib->vba.MetaRowBytesC, ++ mode_lib->vba.meta_row_height[k], ++ mode_lib->vba.meta_row_height_chroma[k], ++ mode_lib->vba.DPTEBytesPerRowY, ++ mode_lib->vba.DPTEBytesPerRowC, ++ mode_lib->vba.dpte_row_height[k], ++ mode_lib->vba.dpte_row_height_chroma[k], ++ &mode_lib->vba.meta_row_bw[k], ++ &mode_lib->vba.dpte_row_bw[k], ++ &mode_lib->vba.qual_row_bw[k]); ++ } ++ mode_lib->vba.ExtraLatency = ++ mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i] ++ + (mode_lib->vba.TotalNumberOfActiveDPP[i][j] ++ * mode_lib->vba.PixelChunkSizeInKByte ++ + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] ++ * mode_lib->vba.MetaChunkSize) ++ * 1024.0 ++ / mode_lib->vba.ReturnBWPerState[i]; ++ if (mode_lib->vba.GPUVMEnable == true) { ++ mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency ++ + mode_lib->vba.TotalNumberOfActiveDPP[i][j] ++ * mode_lib->vba.PTEGroupSize ++ / mode_lib->vba.ReturnBWPerState[i]; ++ } ++ mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; ++ ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency ++ + CalculateWriteBackDelay( ++ mode_lib->vba.WritebackPixelFormat[k], ++ mode_lib->vba.WritebackHRatio[k], ++ mode_lib->vba.WritebackVRatio[k], ++ mode_lib->vba.WritebackLumaHTaps[k], ++ mode_lib->vba.WritebackLumaVTaps[k], ++ mode_lib->vba.WritebackChromaHTaps[k], ++ mode_lib->vba.WritebackChromaVTaps[k], ++ mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j]; ++ } else { ++ locals->WritebackDelay[i][k] = 0.0; ++ } ++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { ++ if (mode_lib->vba.BlendingAndTiming[m] == k ++ && mode_lib->vba.WritebackEnable[m] ++ == true) { ++ locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k], ++ mode_lib->vba.WritebackLatency + CalculateWriteBackDelay( ++ mode_lib->vba.WritebackPixelFormat[m], ++ mode_lib->vba.WritebackHRatio[m], ++ mode_lib->vba.WritebackVRatio[m], ++ mode_lib->vba.WritebackLumaHTaps[m], ++ mode_lib->vba.WritebackLumaVTaps[m], ++ mode_lib->vba.WritebackChromaHTaps[m], ++ mode_lib->vba.WritebackChromaVTaps[m], ++ mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]); ++ } ++ } ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == m) { ++ locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m]; ++ } ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ for (m = 0; m < locals->NumberOfCursors[k]; m++) ++ locals->cursor_bw[k] = locals->NumberOfCursors[k] * locals->CursorWidth[k][m] * locals->CursorBPP[k][m] ++ / 8 / (locals->HTotal[k] / locals->PixelClock[k]) * locals->VRatio[k]; ++ } ++ ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] ++ - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); ++ } ++ ++ mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; ++ do { ++ mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; ++ mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; ++ ++ mode_lib->vba.TWait = CalculateTWait( ++ mode_lib->vba.PrefetchMode[i][j], ++ mode_lib->vba.DRAMClockChangeLatency, ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.SREnterPlusExitTime); ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ ++ if (mode_lib->vba.XFCEnabled[k] == true) { ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = ++ CalculateRemoteSurfaceFlipDelay( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ locals->SwathWidthYPerState[i][j][k], ++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0), ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.XFCTSlvVupdateOffset, ++ mode_lib->vba.XFCTSlvVupdateWidth, ++ mode_lib->vba.XFCTSlvVreadyOffset, ++ mode_lib->vba.XFCXBUFLatencyTolerance, ++ mode_lib->vba.XFCFillBWOverhead, ++ mode_lib->vba.XFCSlvChunkSize, ++ mode_lib->vba.XFCBusTransportTime, ++ mode_lib->vba.TimeCalc, ++ mode_lib->vba.TWait, ++ &mode_lib->vba.SrcActiveDrainRate, ++ &mode_lib->vba.TInitXFill, ++ &mode_lib->vba.TslvChk); ++ } else { ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; ++ } ++ ++ CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, ++ mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], ++ mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], ++ mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, ++ mode_lib->vba.SwathWidthYPerState[i][j][k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k], ++ mode_lib->vba.SwathWidthYSingleDPP[k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.SwathHeightYThisState[k], mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.Interlace[k], mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]); ++ ++ mode_lib->vba.IsErrorResult[i][j][k] = ++ CalculatePrefetchSchedule( ++ mode_lib, ++ mode_lib->vba.RequiredDPPCLK[i][j][k], ++ mode_lib->vba.RequiredDISPCLK[i][j], ++ mode_lib->vba.PixelClock[k], ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ mode_lib->vba.NoOfDPP[i][j][k], ++ mode_lib->vba.NumberOfCursors[k], ++ mode_lib->vba.VTotal[k] ++ - mode_lib->vba.VActive[k], ++ mode_lib->vba.HTotal[k], ++ mode_lib->vba.MaxInterDCNTileRepeaters, ++ mode_lib->vba.MaximumVStartup[k], ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.DynamicMetadataEnable[k], ++ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], ++ mode_lib->vba.DynamicMetadataTransmittedBytes[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.UrgentLatencyPixelDataOnly, ++ mode_lib->vba.ExtraLatency, ++ mode_lib->vba.TimeCalc, ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], ++ mode_lib->vba.MetaRowBytes[k], ++ mode_lib->vba.DPTEBytesPerRow[k], ++ mode_lib->vba.PrefetchLinesY[k], ++ mode_lib->vba.SwathWidthYPerState[i][j][k], ++ mode_lib->vba.BytePerPixelInDETY[k], ++ mode_lib->vba.PrefillY[k], ++ mode_lib->vba.MaxNumSwY[k], ++ mode_lib->vba.PrefetchLinesC[k], ++ mode_lib->vba.BytePerPixelInDETC[k], ++ mode_lib->vba.PrefillC[k], ++ mode_lib->vba.MaxNumSwC[k], ++ mode_lib->vba.SwathHeightYPerState[i][j][k], ++ mode_lib->vba.SwathHeightCPerState[i][j][k], ++ mode_lib->vba.TWait, ++ mode_lib->vba.XFCEnabled[k], ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay, ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ mode_lib->vba.DSTXAfterScaler[k], ++ mode_lib->vba.DSTYAfterScaler[k], ++ &mode_lib->vba.LineTimesForPrefetch[k], ++ &mode_lib->vba.PrefetchBW[k], ++ &mode_lib->vba.LinesForMetaPTE[k], ++ &mode_lib->vba.LinesForMetaAndDPTERow[k], ++ &mode_lib->vba.VRatioPreY[i][j][k], ++ &mode_lib->vba.VRatioPreC[i][j][k], ++ &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k], ++ &mode_lib->vba.Tno_bw[k], ++ &mode_lib->vba.VUpdateOffsetPix[k], ++ &mode_lib->vba.VUpdateWidthPix[k], ++ &mode_lib->vba.VReadyOffsetPix[k]); ++ } ++ mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; ++ mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; ++ locals->prefetch_vm_bw_valid = true; ++ locals->prefetch_row_bw_valid = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) ++ locals->prefetch_vm_bw[k] = 0; ++ else if (locals->LinesForMetaPTE[k] > 0) ++ locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] ++ / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); ++ else { ++ locals->prefetch_vm_bw[k] = 0; ++ locals->prefetch_vm_bw_valid = false; ++ } ++ if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) ++ locals->prefetch_row_bw[k] = 0; ++ else if (locals->LinesForMetaAndDPTERow[k] > 0) ++ locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) ++ / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); ++ else { ++ locals->prefetch_row_bw[k] = 0; ++ locals->prefetch_row_bw_valid = false; ++ } ++ ++ mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch ++ + mode_lib->vba.cursor_bw[k] + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]; ++ mode_lib->vba.MaximumReadBandwidthWithPrefetch = ++ mode_lib->vba.MaximumReadBandwidthWithPrefetch ++ + mode_lib->vba.cursor_bw[k] ++ + dml_max3( ++ mode_lib->vba.prefetch_vm_bw[k], ++ mode_lib->vba.prefetch_row_bw[k], ++ dml_max(mode_lib->vba.ReadBandwidth[k], ++ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) ++ + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); ++ } ++ locals->BandwidthWithoutPrefetchSupported[i] = true; ++ if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { ++ locals->BandwidthWithoutPrefetchSupported[i] = false; ++ } ++ ++ locals->PrefetchSupported[i][j] = true; ++ if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { ++ locals->PrefetchSupported[i][j] = false; ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->LineTimesForPrefetch[k] < 2.0 ++ || locals->LinesForMetaPTE[k] >= 8.0 ++ || locals->LinesForMetaAndDPTERow[k] >= 16.0 ++ || mode_lib->vba.IsErrorResult[i][j][k] == true) { ++ locals->PrefetchSupported[i][j] = false; ++ } ++ } ++ locals->VRatioInPrefetchSupported[i][j] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->VRatioPreY[i][j][k] > 4.0 ++ || locals->VRatioPreC[i][j][k] > 4.0 ++ || mode_lib->vba.IsErrorResult[i][j][k] == true) { ++ locals->VRatioInPrefetchSupported[i][j] = false; ++ } ++ } ++ } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) ++ && mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode); ++ ++ if (mode_lib->vba.PrefetchSupported[i][j] == true ++ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { ++ mode_lib->vba.BandwidthAvailableForImmediateFlip = ++ mode_lib->vba.ReturnBWPerState[i]; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.BandwidthAvailableForImmediateFlip = ++ mode_lib->vba.BandwidthAvailableForImmediateFlip ++ - mode_lib->vba.cursor_bw[k] ++ - dml_max( ++ mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.qual_row_bw[k], ++ mode_lib->vba.PrefetchBW[k]); ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.ImmediateFlipBytes[k] = 0.0; ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { ++ mode_lib->vba.ImmediateFlipBytes[k] = ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] ++ + mode_lib->vba.MetaRowBytes[k] ++ + mode_lib->vba.DPTEBytesPerRow[k]; ++ } ++ } ++ mode_lib->vba.TotImmediateFlipBytes = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { ++ mode_lib->vba.TotImmediateFlipBytes = ++ mode_lib->vba.TotImmediateFlipBytes ++ + mode_lib->vba.ImmediateFlipBytes[k]; ++ } ++ } ++ ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ CalculateFlipSchedule( ++ mode_lib, ++ mode_lib->vba.ExtraLatency, ++ mode_lib->vba.UrgentLatencyPixelDataOnly, ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.BandwidthAvailableForImmediateFlip, ++ mode_lib->vba.TotImmediateFlipBytes, ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.ImmediateFlipBytes[k], ++ mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.Tno_bw[k], ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], ++ mode_lib->vba.MetaRowBytes[k], ++ mode_lib->vba.DPTEBytesPerRow[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.dpte_row_height[k], ++ mode_lib->vba.meta_row_height[k], ++ mode_lib->vba.qual_row_bw[k], ++ &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], ++ &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], ++ &mode_lib->vba.final_flip_bw[k], ++ &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); ++ } ++ mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.total_dcn_read_bw_with_flip = ++ mode_lib->vba.total_dcn_read_bw_with_flip ++ + mode_lib->vba.cursor_bw[k] ++ + dml_max3( ++ mode_lib->vba.prefetch_vm_bw[k], ++ mode_lib->vba.prefetch_row_bw[k], ++ mode_lib->vba.final_flip_bw[k] ++ + dml_max( ++ mode_lib->vba.ReadBandwidth[k], ++ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])); ++ } ++ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; ++ if (mode_lib->vba.total_dcn_read_bw_with_flip ++ > mode_lib->vba.ReturnBWPerState[i]) { ++ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { ++ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; ++ } ++ } ++ } else { ++ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; ++ } ++ } ++ } ++ ++ /*Vertical Active BW support*/ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * ++ mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * ++ mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; ++ if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) ++ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; ++ else ++ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; ++ } ++ ++ /*PTE Buffer Size Check*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ locals->PTEBufferSizeNotExceeded[i][j] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->PTEBufferSizeNotExceededY[i][j][k] == false ++ || locals->PTEBufferSizeNotExceededC[i][j][k] == false) { ++ locals->PTEBufferSizeNotExceeded[i][j] = false; ++ } ++ } ++ } ++ } ++ /*Cursor Support Check*/ ++ mode_lib->vba.CursorSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ for (j = 0; j < 2; j++) { ++ if (mode_lib->vba.CursorWidth[k][j] > 0.0) { ++ if (dml_floor( ++ dml_floor( ++ mode_lib->vba.CursorBufferSize ++ - mode_lib->vba.CursorChunkSize, ++ mode_lib->vba.CursorChunkSize) * 1024.0 ++ / (mode_lib->vba.CursorWidth[k][j] ++ * mode_lib->vba.CursorBPP[k][j] ++ / 8.0), ++ 1.0) ++ * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) ++ / mode_lib->vba.VRatio[k] < mode_lib->vba.UrgentLatencyPixelDataOnly ++ || (mode_lib->vba.CursorBPP[k][j] == 64.0 ++ && mode_lib->vba.Cursor64BppSupport == false)) { ++ mode_lib->vba.CursorSupport = false; ++ } ++ } ++ } ++ } ++ /*Valid Pitch Check*/ ++ ++ mode_lib->vba.PitchSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->AlignedYPitch[k] = dml_ceil( ++ dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), ++ locals->MacroTileWidthY[k]); ++ if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { ++ mode_lib->vba.PitchSupport = false; ++ } ++ if (mode_lib->vba.DCCEnable[k] == true) { ++ locals->AlignedDCCMetaPitch[k] = dml_ceil( ++ dml_max( ++ mode_lib->vba.DCCMetaPitchY[k], ++ mode_lib->vba.ViewportWidth[k]), ++ 64.0 * locals->Read256BlockWidthY[k]); ++ } else { ++ locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; ++ } ++ if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { ++ mode_lib->vba.PitchSupport = false; ++ } ++ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { ++ locals->AlignedCPitch[k] = dml_ceil( ++ dml_max( ++ mode_lib->vba.PitchC[k], ++ mode_lib->vba.ViewportWidth[k] / 2.0), ++ locals->MacroTileWidthC[k]); ++ } else { ++ locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k]; ++ } ++ if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { ++ mode_lib->vba.PitchSupport = false; ++ } ++ } ++ /*Mode Support, Voltage State and SOC Configuration*/ ++ ++ for (i = mode_lib->vba.soc.num_states; i >= 0; i--) { ++ for (j = 0; j < 2; j++) { ++ enum dm_validation_status status = DML_VALIDATION_OK; ++ ++ if (mode_lib->vba.ScaleRatioAndTapsSupport != true) { ++ status = DML_FAIL_SCALE_RATIO_TAP; ++ } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { ++ status = DML_FAIL_SOURCE_PIXEL_FORMAT; ++ } else if (locals->ViewportSizeSupport[i] != true) { ++ status = DML_FAIL_VIEWPORT_SIZE; ++ } else if (locals->DIOSupport[i] != true) { ++ status = DML_FAIL_DIO_SUPPORT; ++ } else if (locals->NotEnoughDSCUnits[i] != false) { ++ status = DML_FAIL_NOT_ENOUGH_DSC; ++ } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { ++ status = DML_FAIL_DSC_CLK_REQUIRED; ++ } else if (locals->UrgentLatencySupport[i][j] != true) { ++ status = DML_FAIL_URGENT_LATENCY; ++ } else if (locals->ROBSupport[i] != true) { ++ status = DML_FAIL_REORDERING_BUFFER; ++ } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { ++ status = DML_FAIL_DISPCLK_DPPCLK; ++ } else if (locals->TotalAvailablePipesSupport[i][j] != true) { ++ status = DML_FAIL_TOTAL_AVAILABLE_PIPES; ++ } else if (mode_lib->vba.NumberOfOTGSupport != true) { ++ status = DML_FAIL_NUM_OTG; ++ } else if (mode_lib->vba.WritebackModeSupport != true) { ++ status = DML_FAIL_WRITEBACK_MODE; ++ } else if (mode_lib->vba.WritebackLatencySupport != true) { ++ status = DML_FAIL_WRITEBACK_LATENCY; ++ } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) { ++ status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; ++ } else if (mode_lib->vba.CursorSupport != true) { ++ status = DML_FAIL_CURSOR_SUPPORT; ++ } else if (mode_lib->vba.PitchSupport != true) { ++ status = DML_FAIL_PITCH_SUPPORT; ++ } else if (locals->PrefetchSupported[i][j] != true) { ++ status = DML_FAIL_PREFETCH_SUPPORT; ++ } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { ++ status = DML_FAIL_TOTAL_V_ACTIVE_BW; ++ } else if (locals->VRatioInPrefetchSupported[i][j] != true) { ++ status = DML_FAIL_V_RATIO_PREFETCH; ++ } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { ++ status = DML_FAIL_PTE_BUFFER_SIZE; ++ } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) { ++ status = DML_FAIL_DSC_INPUT_BPC; ++ } ++ ++ if (status == DML_VALIDATION_OK) { ++ locals->ModeSupport[i][j] = true; ++ } else { ++ locals->ModeSupport[i][j] = false; ++ } ++ locals->ValidationStatus[i] = status; ++ } ++ } ++ { ++ unsigned int MaximumMPCCombine = 0; ++ mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1; ++ for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) { ++ if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) { ++ mode_lib->vba.VoltageLevel = i; ++ if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false ++ || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible)) { ++ MaximumMPCCombine = 1; ++ } else { ++ MaximumMPCCombine = 0; ++ } ++ break; ++ } ++ } ++ mode_lib->vba.ImmediateFlipSupport = ++ locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; ++ locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; ++ } ++ mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; ++ mode_lib->vba.maxMpcComb = MaximumMPCCombine; ++ } ++ mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ mode_lib->vba.ODMCombineEnabled[k] = ++ locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; ++ } else { ++ mode_lib->vba.ODMCombineEnabled[k] = 0; ++ } ++ mode_lib->vba.DSCEnabled[k] = ++ locals->RequiresDSC[mode_lib->vba.VoltageLevel][k]; ++ mode_lib->vba.OutputBpp[k] = ++ locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k]; ++ } ++} +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h +new file mode 100644 +index 000000000000..a989d3ca1e99 +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h +@@ -0,0 +1,32 @@ ++/* ++ * Copyright 2018 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#ifndef _DCN20V2_DISPLAY_MODE_VBA_H_ ++#define _DCN20V2_DISPLAY_MODE_VBA_H_ ++ ++void dml20v2_recalculate(struct display_mode_lib *mode_lib); ++void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); ++ ++#endif +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +new file mode 100644 +index 000000000000..ed8bf5f723c9 +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +@@ -0,0 +1,1701 @@ ++/* ++ * Copyright 2018 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#include "../display_mode_lib.h" ++#include "../display_mode_vba.h" ++#include "display_rq_dlg_calc_20v2.h" ++ ++// Function: dml20v2_rq_dlg_get_rq_params ++// Calculate requestor related parameters that register definition agnostic ++// (i.e. this layer does try to separate real values from register definition) ++// Input: ++// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) ++// Output: ++// rq_param - values that can be used to setup RQ (e.g. swath_height, plane1_addr, etc.) ++// ++static void dml20v2_rq_dlg_get_rq_params( ++ struct display_mode_lib *mode_lib, ++ display_rq_params_st * rq_param, ++ const display_pipe_source_params_st pipe_src_param); ++ ++// Function: dml20v2_rq_dlg_get_dlg_params ++// Calculate deadline related parameters ++// ++static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, ++ const display_e2e_pipe_params_st *e2e_pipe_param, ++ const unsigned int num_pipes, ++ const unsigned int pipe_idx, ++ display_dlg_regs_st *disp_dlg_regs, ++ display_ttu_regs_st *disp_ttu_regs, ++ const display_rq_dlg_params_st rq_dlg_param, ++ const display_dlg_sys_params_st dlg_sys_param, ++ const bool cstate_en, ++ const bool pstate_en); ++/* ++ * NOTE: ++ * This file is gcc-parseable HW gospel, coming straight from HW engineers. ++ * ++ * It doesn't adhere to Linux kernel style and sometimes will do things in odd ++ * ways. Unless there is something clearly wrong with it the code should ++ * remain as-is as it provides us with a guarantee from HW that it is correct. ++ */ ++ ++static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, ++ double *refcyc_per_req_delivery_pre_cur, ++ double *refcyc_per_req_delivery_cur, ++ double refclk_freq_in_mhz, ++ double ref_freq_to_pix_freq, ++ double hscale_pixel_rate_l, ++ double hscl_ratio, ++ double vratio_pre_l, ++ double vratio_l, ++ unsigned int cur_width, ++ enum cursor_bpp cur_bpp); ++ ++#include "../dml_inline_defs.h" ++ ++static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) ++{ ++ unsigned int ret_val = 0; ++ ++ if (source_format == dm_444_16) { ++ if (!is_chroma) ++ ret_val = 2; ++ } else if (source_format == dm_444_32) { ++ if (!is_chroma) ++ ret_val = 4; ++ } else if (source_format == dm_444_64) { ++ if (!is_chroma) ++ ret_val = 8; ++ } else if (source_format == dm_420_8) { ++ if (is_chroma) ++ ret_val = 2; ++ else ++ ret_val = 1; ++ } else if (source_format == dm_420_10) { ++ if (is_chroma) ++ ret_val = 4; ++ else ++ ret_val = 2; ++ } else if (source_format == dm_444_8) { ++ ret_val = 1; ++ } ++ return ret_val; ++} ++ ++static bool is_dual_plane(enum source_format_class source_format) ++{ ++ bool ret_val = 0; ++ ++ if ((source_format == dm_420_8) || (source_format == dm_420_10)) ++ ret_val = 1; ++ ++ return ret_val; ++} ++ ++static double get_refcyc_per_delivery(struct display_mode_lib *mode_lib, ++ double refclk_freq_in_mhz, ++ double pclk_freq_in_mhz, ++ bool odm_combine, ++ unsigned int recout_width, ++ unsigned int hactive, ++ double vratio, ++ double hscale_pixel_rate, ++ unsigned int delivery_width, ++ unsigned int req_per_swath_ub) ++{ ++ double refcyc_per_delivery = 0.0; ++ ++ if (vratio <= 1.0) { ++ if (odm_combine) ++ refcyc_per_delivery = (double) refclk_freq_in_mhz ++ * dml_min((double) recout_width, (double) hactive / 2.0) ++ / pclk_freq_in_mhz / (double) req_per_swath_ub; ++ else ++ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width ++ / pclk_freq_in_mhz / (double) req_per_swath_ub; ++ } else { ++ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width ++ / (double) hscale_pixel_rate / (double) req_per_swath_ub; ++ } ++ ++ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); ++ dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); ++ dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); ++ dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); ++ ++ return refcyc_per_delivery; ++ ++} ++ ++static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) ++{ ++ if (tile_size == dm_256k_tile) ++ return (256 * 1024); ++ else if (tile_size == dm_64k_tile) ++ return (64 * 1024); ++ else ++ return (4 * 1024); ++} ++ ++static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, ++ display_data_rq_regs_st *rq_regs, ++ const display_data_rq_sizing_params_st rq_sizing) ++{ ++ dml_print("DML_DLG: %s: rq_sizing param\n", __func__); ++ print__data_rq_sizing_params_st(mode_lib, rq_sizing); ++ ++ rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10; ++ ++ if (rq_sizing.min_chunk_bytes == 0) ++ rq_regs->min_chunk_size = 0; ++ else ++ rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1; ++ ++ rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10; ++ if (rq_sizing.min_meta_chunk_bytes == 0) ++ rq_regs->min_meta_chunk_size = 0; ++ else ++ rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1; ++ ++ rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6; ++ rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6; ++} ++ ++static void extract_rq_regs(struct display_mode_lib *mode_lib, ++ display_rq_regs_st *rq_regs, ++ const display_rq_params_st rq_param) ++{ ++ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; ++ unsigned int detile_buf_plane1_addr = 0; ++ ++ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l); ++ ++ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_l.dpte_row_height), ++ 1) - 3; ++ ++ if (rq_param.yuv420) { ++ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c); ++ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_c.dpte_row_height), ++ 1) - 3; ++ } ++ ++ rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); ++ rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); ++ ++ // FIXME: take the max between luma, chroma chunk size? ++ // okay for now, as we are setting chunk_bytes to 8kb anyways ++ if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb ++ rq_regs->drq_expansion_mode = 0; ++ } else { ++ rq_regs->drq_expansion_mode = 2; ++ } ++ rq_regs->prq_expansion_mode = 1; ++ rq_regs->mrq_expansion_mode = 1; ++ rq_regs->crq_expansion_mode = 1; ++ ++ if (rq_param.yuv420) { ++ if ((double) rq_param.misc.rq_l.stored_swath_bytes ++ / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) { ++ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma ++ } else { ++ detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), ++ 256, ++ 0) / 64.0; // 2/3 to chroma ++ } ++ } ++ rq_regs->plane1_base_address = detile_buf_plane1_addr; ++} ++ ++static void handle_det_buf_split(struct display_mode_lib *mode_lib, ++ display_rq_params_st *rq_param, ++ const display_pipe_source_params_st pipe_src_param) ++{ ++ unsigned int total_swath_bytes = 0; ++ unsigned int swath_bytes_l = 0; ++ unsigned int swath_bytes_c = 0; ++ unsigned int full_swath_bytes_packed_l = 0; ++ unsigned int full_swath_bytes_packed_c = 0; ++ bool req128_l = 0; ++ bool req128_c = 0; ++ bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); ++ bool surf_vert = (pipe_src_param.source_scan == dm_vert); ++ unsigned int log2_swath_height_l = 0; ++ unsigned int log2_swath_height_c = 0; ++ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; ++ ++ full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; ++ full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; ++ ++ if (rq_param->yuv420_10bpc) { ++ full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2 / 3, ++ 256, ++ 1) + 256; ++ full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2 / 3, ++ 256, ++ 1) + 256; ++ } ++ ++ if (rq_param->yuv420) { ++ total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; ++ ++ if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request ++ req128_l = 0; ++ req128_c = 0; ++ swath_bytes_l = full_swath_bytes_packed_l; ++ swath_bytes_c = full_swath_bytes_packed_c; ++ } else { //128b request (for luma only for yuv420 8bpc) ++ req128_l = 1; ++ req128_c = 0; ++ swath_bytes_l = full_swath_bytes_packed_l / 2; ++ swath_bytes_c = full_swath_bytes_packed_c; ++ } ++ // Note: assumption, the config that pass in will fit into ++ // the detiled buffer. ++ } else { ++ total_swath_bytes = 2 * full_swath_bytes_packed_l; ++ ++ if (total_swath_bytes <= detile_buf_size_in_bytes) ++ req128_l = 0; ++ else ++ req128_l = 1; ++ ++ swath_bytes_l = total_swath_bytes; ++ swath_bytes_c = 0; ++ } ++ rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; ++ rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; ++ ++ if (surf_linear) { ++ log2_swath_height_l = 0; ++ log2_swath_height_c = 0; ++ } else if (!surf_vert) { ++ log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l; ++ log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c; ++ } else { ++ log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l; ++ log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c; ++ } ++ rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; ++ rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; ++ ++ dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); ++ dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); ++ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", ++ __func__, ++ full_swath_bytes_packed_l); ++ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", ++ __func__, ++ full_swath_bytes_packed_c); ++} ++ ++static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, ++ display_data_rq_dlg_params_st *rq_dlg_param, ++ display_data_rq_misc_params_st *rq_misc_param, ++ display_data_rq_sizing_params_st *rq_sizing_param, ++ unsigned int vp_width, ++ unsigned int vp_height, ++ unsigned int data_pitch, ++ unsigned int meta_pitch, ++ unsigned int source_format, ++ unsigned int tiling, ++ unsigned int macro_tile_size, ++ unsigned int source_scan, ++ unsigned int is_chroma) ++{ ++ bool surf_linear = (tiling == dm_sw_linear); ++ bool surf_vert = (source_scan == dm_vert); ++ ++ unsigned int bytes_per_element; ++ unsigned int bytes_per_element_y = get_bytes_per_element((enum source_format_class)(source_format), ++ false); ++ unsigned int bytes_per_element_c = get_bytes_per_element((enum source_format_class)(source_format), ++ true); ++ ++ unsigned int blk256_width = 0; ++ unsigned int blk256_height = 0; ++ ++ unsigned int blk256_width_y = 0; ++ unsigned int blk256_height_y = 0; ++ unsigned int blk256_width_c = 0; ++ unsigned int blk256_height_c = 0; ++ unsigned int log2_bytes_per_element; ++ unsigned int log2_blk256_width; ++ unsigned int log2_blk256_height; ++ unsigned int blk_bytes; ++ unsigned int log2_blk_bytes; ++ unsigned int log2_blk_height; ++ unsigned int log2_blk_width; ++ unsigned int log2_meta_req_bytes; ++ unsigned int log2_meta_req_height; ++ unsigned int log2_meta_req_width; ++ unsigned int meta_req_width; ++ unsigned int meta_req_height; ++ unsigned int log2_meta_row_height; ++ unsigned int meta_row_width_ub; ++ unsigned int log2_meta_chunk_bytes; ++ unsigned int log2_meta_chunk_height; ++ ++ //full sized meta chunk width in unit of data elements ++ unsigned int log2_meta_chunk_width; ++ unsigned int log2_min_meta_chunk_bytes; ++ unsigned int min_meta_chunk_width; ++ unsigned int meta_chunk_width; ++ unsigned int meta_chunk_per_row_int; ++ unsigned int meta_row_remainder; ++ unsigned int meta_chunk_threshold; ++ unsigned int meta_blk_bytes; ++ unsigned int meta_blk_height; ++ unsigned int meta_blk_width; ++ unsigned int meta_surface_bytes; ++ unsigned int vmpg_bytes; ++ unsigned int meta_pte_req_per_frame_ub; ++ unsigned int meta_pte_bytes_per_frame_ub; ++ const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); ++ const unsigned int dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; ++ const unsigned int pde_proc_buffer_size_64k_reqs = ++ mode_lib->ip.pde_proc_buffer_size_64k_reqs; ++ ++ unsigned int log2_vmpg_height = 0; ++ unsigned int log2_vmpg_width = 0; ++ unsigned int log2_dpte_req_height_ptes = 0; ++ unsigned int log2_dpte_req_height = 0; ++ unsigned int log2_dpte_req_width = 0; ++ unsigned int log2_dpte_row_height_linear = 0; ++ unsigned int log2_dpte_row_height = 0; ++ unsigned int log2_dpte_group_width = 0; ++ unsigned int dpte_row_width_ub = 0; ++ unsigned int dpte_req_height = 0; ++ unsigned int dpte_req_width = 0; ++ unsigned int dpte_group_width = 0; ++ unsigned int log2_dpte_group_bytes = 0; ++ unsigned int log2_dpte_group_length = 0; ++ unsigned int pde_buf_entries; ++ bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); ++ ++ Calculate256BBlockSizes((enum source_format_class)(source_format), ++ (enum dm_swizzle_mode)(tiling), ++ bytes_per_element_y, ++ bytes_per_element_c, ++ &blk256_height_y, ++ &blk256_height_c, ++ &blk256_width_y, ++ &blk256_width_c); ++ ++ if (!is_chroma) { ++ blk256_width = blk256_width_y; ++ blk256_height = blk256_height_y; ++ bytes_per_element = bytes_per_element_y; ++ } else { ++ blk256_width = blk256_width_c; ++ blk256_height = blk256_height_c; ++ bytes_per_element = bytes_per_element_c; ++ } ++ ++ log2_bytes_per_element = dml_log2(bytes_per_element); ++ ++ dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); ++ dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); ++ dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); ++ dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); ++ ++ log2_blk256_width = dml_log2((double) blk256_width); ++ log2_blk256_height = dml_log2((double) blk256_height); ++ blk_bytes = surf_linear ? ++ 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); ++ log2_blk_bytes = dml_log2((double) blk_bytes); ++ log2_blk_height = 0; ++ log2_blk_width = 0; ++ ++ // remember log rule ++ // "+" in log is multiply ++ // "-" in log is divide ++ // "/2" is like square root ++ // blk is vertical biased ++ if (tiling != dm_sw_linear) ++ log2_blk_height = log2_blk256_height ++ + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); ++ else ++ log2_blk_height = 0; // blk height of 1 ++ ++ log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; ++ ++ if (!surf_vert) { ++ rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) ++ + blk256_width; ++ rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; ++ } else { ++ rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_height - 1, blk256_height, 1) ++ + blk256_height; ++ rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; ++ } ++ ++ if (!surf_vert) ++ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height ++ * bytes_per_element; ++ else ++ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width ++ * bytes_per_element; ++ ++ rq_misc_param->blk256_height = blk256_height; ++ rq_misc_param->blk256_width = blk256_width; ++ ++ // ------- ++ // meta ++ // ------- ++ log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element ++ ++ // each 64b meta request for dcn is 8x8 meta elements and ++ // a meta element covers one 256b block of the the data surface. ++ log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 ++ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element ++ - log2_meta_req_height; ++ meta_req_width = 1 << log2_meta_req_width; ++ meta_req_height = 1 << log2_meta_req_height; ++ log2_meta_row_height = 0; ++ meta_row_width_ub = 0; ++ ++ // the dimensions of a meta row are meta_row_width x meta_row_height in elements. ++ // calculate upper bound of the meta_row_width ++ if (!surf_vert) { ++ log2_meta_row_height = log2_meta_req_height; ++ meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) ++ + meta_req_width; ++ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; ++ } else { ++ log2_meta_row_height = log2_meta_req_width; ++ meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) ++ + meta_req_height; ++ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; ++ } ++ rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; ++ ++ rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; ++ ++ log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); ++ log2_meta_chunk_height = log2_meta_row_height; ++ ++ //full sized meta chunk width in unit of data elements ++ log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element ++ - log2_meta_chunk_height; ++ log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); ++ min_meta_chunk_width = 1 ++ << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element ++ - log2_meta_chunk_height); ++ meta_chunk_width = 1 << log2_meta_chunk_width; ++ meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); ++ meta_row_remainder = meta_row_width_ub % meta_chunk_width; ++ meta_chunk_threshold = 0; ++ meta_blk_bytes = 4096; ++ meta_blk_height = blk256_height * 64; ++ meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; ++ meta_surface_bytes = meta_pitch ++ * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) ++ * bytes_per_element / 256; ++ vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; ++ meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, ++ 8 * vmpg_bytes, ++ 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); ++ meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request ++ rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; ++ ++ dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); ++ dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); ++ dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); ++ dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", ++ __func__, ++ meta_pte_req_per_frame_ub); ++ dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", ++ __func__, ++ meta_pte_bytes_per_frame_ub); ++ ++ if (!surf_vert) ++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; ++ else ++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; ++ ++ if (meta_row_remainder <= meta_chunk_threshold) ++ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; ++ else ++ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; ++ ++ // ------ ++ // dpte ++ // ------ ++ if (surf_linear) { ++ log2_vmpg_height = 0; // one line high ++ } else { ++ log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; ++ } ++ log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; ++ ++ // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. ++ if (surf_linear) { //one 64B PTE request returns 8 PTEs ++ log2_dpte_req_height_ptes = 0; ++ log2_dpte_req_width = log2_vmpg_width + 3; ++ log2_dpte_req_height = 0; ++ } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size ++ //one 64B req gives 8x1 PTEs for 4KB tile ++ log2_dpte_req_height_ptes = 0; ++ log2_dpte_req_width = log2_blk_width + 3; ++ log2_dpte_req_height = log2_blk_height + 0; ++ } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB ++ //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB ++ log2_dpte_req_height_ptes = 4; ++ log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width ++ log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height ++ } else { //64KB page size and must 64KB tile block ++ //one 64B req gives 8x1 PTEs for 64KB tile ++ log2_dpte_req_height_ptes = 0; ++ log2_dpte_req_width = log2_blk_width + 3; ++ log2_dpte_req_height = log2_blk_height + 0; ++ } ++ ++ // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height ++ // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent ++ // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) ++ //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; ++ //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; ++ dpte_req_height = 1 << log2_dpte_req_height; ++ dpte_req_width = 1 << log2_dpte_req_width; ++ ++ // calculate pitch dpte row buffer can hold ++ // round the result down to a power of two. ++ pde_buf_entries = yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; ++ if (surf_linear) { ++ unsigned int dpte_row_height; ++ ++ log2_dpte_row_height_linear = dml_floor(dml_log2(dml_min(64 * 1024 * pde_buf_entries ++ / bytes_per_element, ++ dpte_buf_in_pte_reqs ++ * dpte_req_width) ++ / data_pitch), ++ 1); ++ ++ ASSERT(log2_dpte_row_height_linear >= 3); ++ ++ if (log2_dpte_row_height_linear > 7) ++ log2_dpte_row_height_linear = 7; ++ ++ log2_dpte_row_height = log2_dpte_row_height_linear; ++ // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. ++ // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. ++ dpte_row_height = 1 << log2_dpte_row_height; ++ dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, ++ dpte_req_width, ++ 1) + dpte_req_width; ++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; ++ } else { ++ // the upper bound of the dpte_row_width without dependency on viewport position follows. ++ // for tiled mode, row height is the same as req height and row store up to vp size upper bound ++ if (!surf_vert) { ++ log2_dpte_row_height = log2_dpte_req_height; ++ dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) ++ + dpte_req_width; ++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; ++ } else { ++ log2_dpte_row_height = ++ (log2_blk_width < log2_dpte_req_width) ? ++ log2_blk_width : log2_dpte_req_width; ++ dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) ++ + dpte_req_height; ++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; ++ } ++ } ++ if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB ++ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request ++ else ++ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request ++ ++ rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; ++ ++ // the dpte_group_bytes is reduced for the specific case of vertical ++ // access of a tile surface that has dpte request of 8x1 ptes. ++ if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group ++ rq_sizing_param->dpte_group_bytes = 512; ++ else ++ //full size ++ rq_sizing_param->dpte_group_bytes = 2048; ++ ++ //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. ++ log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); ++ log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests ++ ++ // full sized data pte group width in elements ++ if (!surf_vert) ++ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; ++ else ++ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; ++ ++ //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B ++ if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB ++ log2_dpte_group_width = log2_dpte_group_width - 1; ++ ++ dpte_group_width = 1 << log2_dpte_group_width; ++ ++ // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, ++ // the upper bound for the dpte groups per row is as follows. ++ rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, ++ 1); ++} ++ ++static void get_surf_rq_param(struct display_mode_lib *mode_lib, ++ display_data_rq_sizing_params_st *rq_sizing_param, ++ display_data_rq_dlg_params_st *rq_dlg_param, ++ display_data_rq_misc_params_st *rq_misc_param, ++ const display_pipe_source_params_st pipe_src_param, ++ bool is_chroma) ++{ ++ bool mode_422 = 0; ++ unsigned int vp_width = 0; ++ unsigned int vp_height = 0; ++ unsigned int data_pitch = 0; ++ unsigned int meta_pitch = 0; ++ unsigned int ppe = mode_422 ? 2 : 1; ++ ++ // FIXME check if ppe apply for both luma and chroma in 422 case ++ if (is_chroma) { ++ vp_width = pipe_src_param.viewport_width_c / ppe; ++ vp_height = pipe_src_param.viewport_height_c; ++ data_pitch = pipe_src_param.data_pitch_c; ++ meta_pitch = pipe_src_param.meta_pitch_c; ++ } else { ++ vp_width = pipe_src_param.viewport_width / ppe; ++ vp_height = pipe_src_param.viewport_height; ++ data_pitch = pipe_src_param.data_pitch; ++ meta_pitch = pipe_src_param.meta_pitch; ++ } ++ ++ rq_sizing_param->chunk_bytes = 8192; ++ ++ if (rq_sizing_param->chunk_bytes == 64 * 1024) ++ rq_sizing_param->min_chunk_bytes = 0; ++ else ++ rq_sizing_param->min_chunk_bytes = 1024; ++ ++ rq_sizing_param->meta_chunk_bytes = 2048; ++ rq_sizing_param->min_meta_chunk_bytes = 256; ++ ++ rq_sizing_param->mpte_group_bytes = 2048; ++ ++ get_meta_and_pte_attr(mode_lib, ++ rq_dlg_param, ++ rq_misc_param, ++ rq_sizing_param, ++ vp_width, ++ vp_height, ++ data_pitch, ++ meta_pitch, ++ pipe_src_param.source_format, ++ pipe_src_param.sw_mode, ++ pipe_src_param.macro_tile_size, ++ pipe_src_param.source_scan, ++ is_chroma); ++} ++ ++static void dml20v2_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, ++ display_rq_params_st *rq_param, ++ const display_pipe_source_params_st pipe_src_param) ++{ ++ // get param for luma surface ++ rq_param->yuv420 = pipe_src_param.source_format == dm_420_8 ++ || pipe_src_param.source_format == dm_420_10; ++ rq_param->yuv420_10bpc = pipe_src_param.source_format == dm_420_10; ++ ++ get_surf_rq_param(mode_lib, ++ &(rq_param->sizing.rq_l), ++ &(rq_param->dlg.rq_l), ++ &(rq_param->misc.rq_l), ++ pipe_src_param, ++ 0); ++ ++ if (is_dual_plane((enum source_format_class)(pipe_src_param.source_format))) { ++ // get param for chroma surface ++ get_surf_rq_param(mode_lib, ++ &(rq_param->sizing.rq_c), ++ &(rq_param->dlg.rq_c), ++ &(rq_param->misc.rq_c), ++ pipe_src_param, ++ 1); ++ } ++ ++ // calculate how to split the det buffer space between luma and chroma ++ handle_det_buf_split(mode_lib, rq_param, pipe_src_param); ++ print__rq_params_st(mode_lib, *rq_param); ++} ++ ++void dml20v2_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, ++ display_rq_regs_st *rq_regs, ++ const display_pipe_params_st pipe_param) ++{ ++ display_rq_params_st rq_param = {0}; ++ ++ memset(rq_regs, 0, sizeof(*rq_regs)); ++ dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param.src); ++ extract_rq_regs(mode_lib, rq_regs, rq_param); ++ ++ print__rq_regs_st(mode_lib, *rq_regs); ++} ++ ++// Note: currently taken in as is. ++// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. ++static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, ++ const display_e2e_pipe_params_st *e2e_pipe_param, ++ const unsigned int num_pipes, ++ const unsigned int pipe_idx, ++ display_dlg_regs_st *disp_dlg_regs, ++ display_ttu_regs_st *disp_ttu_regs, ++ const display_rq_dlg_params_st rq_dlg_param, ++ const display_dlg_sys_params_st dlg_sys_param, ++ const bool cstate_en, ++ const bool pstate_en) ++{ ++ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; ++ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; ++ const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; ++ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; ++ const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; ++ const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; ++ ++ // ------------------------- ++ // Section 1.15.2.1: OTG dependent Params ++ // ------------------------- ++ // Timing ++ unsigned int htotal = dst->htotal; ++// unsigned int hblank_start = dst.hblank_start; // TODO: Remove ++ unsigned int hblank_end = dst->hblank_end; ++ unsigned int vblank_start = dst->vblank_start; ++ unsigned int vblank_end = dst->vblank_end; ++ unsigned int min_vblank = mode_lib->ip.min_vblank_lines; ++ ++ double dppclk_freq_in_mhz = clks->dppclk_mhz; ++ double dispclk_freq_in_mhz = clks->dispclk_mhz; ++ double refclk_freq_in_mhz = clks->refclk_mhz; ++ double pclk_freq_in_mhz = dst->pixel_rate_mhz; ++ bool interlaced = dst->interlaced; ++ ++ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; ++ ++ double min_dcfclk_mhz; ++ double t_calc_us; ++ double min_ttu_vblank; ++ ++ double min_dst_y_ttu_vblank; ++ unsigned int dlg_vblank_start; ++ bool dual_plane; ++ bool mode_422; ++ unsigned int access_dir; ++ unsigned int vp_height_l; ++ unsigned int vp_width_l; ++ unsigned int vp_height_c; ++ unsigned int vp_width_c; ++ ++ // Scaling ++ unsigned int htaps_l; ++ unsigned int htaps_c; ++ double hratio_l; ++ double hratio_c; ++ double vratio_l; ++ double vratio_c; ++ bool scl_enable; ++ ++ double line_time_in_us; ++ // double vinit_l; ++ // double vinit_c; ++ // double vinit_bot_l; ++ // double vinit_bot_c; ++ ++ // unsigned int swath_height_l; ++ unsigned int swath_width_ub_l; ++ // unsigned int dpte_bytes_per_row_ub_l; ++ unsigned int dpte_groups_per_row_ub_l; ++ // unsigned int meta_pte_bytes_per_frame_ub_l; ++ // unsigned int meta_bytes_per_row_ub_l; ++ ++ // unsigned int swath_height_c; ++ unsigned int swath_width_ub_c; ++ // unsigned int dpte_bytes_per_row_ub_c; ++ unsigned int dpte_groups_per_row_ub_c; ++ ++ unsigned int meta_chunks_per_row_ub_l; ++ unsigned int meta_chunks_per_row_ub_c; ++ unsigned int vupdate_offset; ++ unsigned int vupdate_width; ++ unsigned int vready_offset; ++ ++ unsigned int dppclk_delay_subtotal; ++ unsigned int dispclk_delay_subtotal; ++ unsigned int pixel_rate_delay_subtotal; ++ ++ unsigned int vstartup_start; ++ unsigned int dst_x_after_scaler; ++ unsigned int dst_y_after_scaler; ++ double line_wait; ++ double dst_y_prefetch; ++ double dst_y_per_vm_vblank; ++ double dst_y_per_row_vblank; ++ double dst_y_per_vm_flip; ++ double dst_y_per_row_flip; ++ double min_dst_y_per_vm_vblank; ++ double min_dst_y_per_row_vblank; ++ double lsw; ++ double vratio_pre_l; ++ double vratio_pre_c; ++ unsigned int req_per_swath_ub_l; ++ unsigned int req_per_swath_ub_c; ++ unsigned int meta_row_height_l; ++ unsigned int meta_row_height_c; ++ unsigned int swath_width_pixels_ub_l; ++ unsigned int swath_width_pixels_ub_c; ++ unsigned int scaler_rec_in_width_l; ++ unsigned int scaler_rec_in_width_c; ++ unsigned int dpte_row_height_l; ++ unsigned int dpte_row_height_c; ++ double hscale_pixel_rate_l; ++ double hscale_pixel_rate_c; ++ double min_hratio_fact_l; ++ double min_hratio_fact_c; ++ double refcyc_per_line_delivery_pre_l; ++ double refcyc_per_line_delivery_pre_c; ++ double refcyc_per_line_delivery_l; ++ double refcyc_per_line_delivery_c; ++ ++ double refcyc_per_req_delivery_pre_l; ++ double refcyc_per_req_delivery_pre_c; ++ double refcyc_per_req_delivery_l; ++ double refcyc_per_req_delivery_c; ++ ++ unsigned int full_recout_width; ++ double xfc_transfer_delay; ++ double xfc_precharge_delay; ++ double xfc_remote_surface_flip_latency; ++ double xfc_dst_y_delta_drq_limit; ++ double xfc_prefetch_margin; ++ double refcyc_per_req_delivery_pre_cur0; ++ double refcyc_per_req_delivery_cur0; ++ double refcyc_per_req_delivery_pre_cur1; ++ double refcyc_per_req_delivery_cur1; ++ ++ memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); ++ memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); ++ ++ dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); ++ dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); ++ ++ dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); ++ ASSERT(ref_freq_to_pix_freq < 4.0); ++ ++ disp_dlg_regs->ref_freq_to_pix_freq = ++ (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); ++ disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal ++ * dml_pow(2, 8)); ++ disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits ++ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end ++ * (double) ref_freq_to_pix_freq); ++ ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13)); ++ ++ min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; ++ t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); ++ min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; ++ dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; ++ ++ disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start ++ + min_dst_y_ttu_vblank) * dml_pow(2, 2)); ++ ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18)); ++ ++ dml_print("DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", ++ __func__, ++ min_dcfclk_mhz); ++ dml_print("DML_DLG: %s: min_ttu_vblank = %3.2f\n", ++ __func__, ++ min_ttu_vblank); ++ dml_print("DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", ++ __func__, ++ min_dst_y_ttu_vblank); ++ dml_print("DML_DLG: %s: t_calc_us = %3.2f\n", ++ __func__, ++ t_calc_us); ++ dml_print("DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", ++ __func__, ++ disp_dlg_regs->min_dst_y_next_start); ++ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", ++ __func__, ++ ref_freq_to_pix_freq); ++ ++ // ------------------------- ++ // Section 1.15.2.2: Prefetch, Active and TTU ++ // ------------------------- ++ // Prefetch Calc ++ // Source ++// dcc_en = src.dcc; ++ dual_plane = is_dual_plane((enum source_format_class)(src->source_format)); ++ mode_422 = 0; // FIXME ++ access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed ++// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); ++// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); ++ vp_height_l = src->viewport_height; ++ vp_width_l = src->viewport_width; ++ vp_height_c = src->viewport_height_c; ++ vp_width_c = src->viewport_width_c; ++ ++ // Scaling ++ htaps_l = taps->htaps; ++ htaps_c = taps->htaps_c; ++ hratio_l = scl->hscl_ratio; ++ hratio_c = scl->hscl_ratio_c; ++ vratio_l = scl->vscl_ratio; ++ vratio_c = scl->vscl_ratio_c; ++ scl_enable = scl->scl_enable; ++ ++ line_time_in_us = (htotal / pclk_freq_in_mhz); ++// vinit_l = scl.vinit; ++// vinit_c = scl.vinit_c; ++// vinit_bot_l = scl.vinit_bot; ++// vinit_bot_c = scl.vinit_bot_c; ++ ++// unsigned int swath_height_l = rq_dlg_param.rq_l.swath_height; ++ swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub; ++// unsigned int dpte_bytes_per_row_ub_l = rq_dlg_param.rq_l.dpte_bytes_per_row_ub; ++ dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub; ++// unsigned int meta_pte_bytes_per_frame_ub_l = rq_dlg_param.rq_l.meta_pte_bytes_per_frame_ub; ++// unsigned int meta_bytes_per_row_ub_l = rq_dlg_param.rq_l.meta_bytes_per_row_ub; ++ ++// unsigned int swath_height_c = rq_dlg_param.rq_c.swath_height; ++ swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub; ++ // dpte_bytes_per_row_ub_c = rq_dlg_param.rq_c.dpte_bytes_per_row_ub; ++ dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub; ++ ++ meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub; ++ meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub; ++ vupdate_offset = dst->vupdate_offset; ++ vupdate_width = dst->vupdate_width; ++ vready_offset = dst->vready_offset; ++ ++ dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; ++ dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; ++ ++ if (scl_enable) ++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; ++ else ++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; ++ ++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter ++ + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; ++ ++ if (dout->dsc_enable) { ++ double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ dispclk_delay_subtotal += dsc_delay; ++ } ++ ++ pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz ++ + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; ++ ++ vstartup_start = dst->vstartup_start; ++ if (interlaced) { ++ if (vstartup_start / 2.0 ++ - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal ++ <= vblank_end / 2.0) ++ disp_dlg_regs->vready_after_vcount0 = 1; ++ else ++ disp_dlg_regs->vready_after_vcount0 = 0; ++ } else { ++ if (vstartup_start ++ - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal ++ <= vblank_end) ++ disp_dlg_regs->vready_after_vcount0 = 1; ++ else ++ disp_dlg_regs->vready_after_vcount0 = 0; ++ } ++ ++ // TODO: Where is this coming from? ++ if (interlaced) ++ vstartup_start = vstartup_start / 2; ++ ++ // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? ++ if (vstartup_start >= min_vblank) { ++ dml_print("WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", ++ __func__, ++ vblank_start, ++ vblank_end); ++ dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", ++ __func__, ++ vstartup_start, ++ min_vblank); ++ min_vblank = vstartup_start + 1; ++ dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", ++ __func__, ++ vstartup_start, ++ min_vblank); ++ } ++ ++ dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); ++ dml_print("DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", ++ __func__, ++ pixel_rate_delay_subtotal); ++ dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", ++ __func__, ++ dst_x_after_scaler); ++ dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", ++ __func__, ++ dst_y_after_scaler); ++ ++ // Lwait ++ line_wait = mode_lib->soc.urgent_latency_us; ++ if (cstate_en) ++ line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); ++ if (pstate_en) ++ line_wait = dml_max(mode_lib->soc.dram_clock_change_latency_us ++ + mode_lib->soc.urgent_latency_us, ++ line_wait); ++ line_wait = line_wait / line_time_in_us; ++ ++ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); ++ ++ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ min_dst_y_per_vm_vblank = 8.0; ++ min_dst_y_per_row_vblank = 16.0; ++ ++ // magic! ++ if (htotal <= 75) { ++ min_vblank = 300; ++ min_dst_y_per_vm_vblank = 100.0; ++ min_dst_y_per_row_vblank = 100.0; ++ } ++ ++ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); ++ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); ++ ++ ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank); ++ ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank); ++ ++ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); ++ lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); ++ ++ dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); ++ ++ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); ++ dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); ++ ++ // Active ++ req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub; ++ req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub; ++ meta_row_height_l = rq_dlg_param.rq_l.meta_row_height; ++ meta_row_height_c = rq_dlg_param.rq_c.meta_row_height; ++ swath_width_pixels_ub_l = 0; ++ swath_width_pixels_ub_c = 0; ++ scaler_rec_in_width_l = 0; ++ scaler_rec_in_width_c = 0; ++ dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height; ++ dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height; ++ ++ if (mode_422) { ++ swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element ++ swath_width_pixels_ub_c = swath_width_ub_c * 2; ++ } else { ++ swath_width_pixels_ub_l = swath_width_ub_l * 1; ++ swath_width_pixels_ub_c = swath_width_ub_c * 1; ++ } ++ ++ hscale_pixel_rate_l = 0.; ++ hscale_pixel_rate_c = 0.; ++ min_hratio_fact_l = 1.0; ++ min_hratio_fact_c = 1.0; ++ ++ if (htaps_l <= 1) ++ min_hratio_fact_l = 2.0; ++ else if (htaps_l <= 6) { ++ if ((hratio_l * 2.0) > 4.0) ++ min_hratio_fact_l = 4.0; ++ else ++ min_hratio_fact_l = hratio_l * 2.0; ++ } else { ++ if (hratio_l > 4.0) ++ min_hratio_fact_l = 4.0; ++ else ++ min_hratio_fact_l = hratio_l; ++ } ++ ++ hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; ++ ++ if (htaps_c <= 1) ++ min_hratio_fact_c = 2.0; ++ else if (htaps_c <= 6) { ++ if ((hratio_c * 2.0) > 4.0) ++ min_hratio_fact_c = 4.0; ++ else ++ min_hratio_fact_c = hratio_c * 2.0; ++ } else { ++ if (hratio_c > 4.0) ++ min_hratio_fact_c = 4.0; ++ else ++ min_hratio_fact_c = hratio_c; ++ } ++ ++ hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; ++ ++ refcyc_per_line_delivery_pre_l = 0.; ++ refcyc_per_line_delivery_pre_c = 0.; ++ refcyc_per_line_delivery_l = 0.; ++ refcyc_per_line_delivery_c = 0.; ++ ++ refcyc_per_req_delivery_pre_l = 0.; ++ refcyc_per_req_delivery_pre_c = 0.; ++ refcyc_per_req_delivery_l = 0.; ++ refcyc_per_req_delivery_c = 0.; ++ ++ full_recout_width = 0; ++ // In ODM ++ if (src->is_hsplit) { ++ // This "hack" is only allowed (and valid) for MPC combine. In ODM ++ // combine, you MUST specify the full_recout_width...according to Oswin ++ if (dst->full_recout_width == 0 && !dst->odm_combine) { ++ dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", ++ __func__); ++ full_recout_width = dst->recout_width * 2; // assume half split for dcn1 ++ } else ++ full_recout_width = dst->full_recout_width; ++ } else ++ full_recout_width = dst->recout_width; ++ ++ // As of DCN2, mpc_combine and odm_combine are mutually exclusive ++ refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_pre_l, ++ hscale_pixel_rate_l, ++ swath_width_pixels_ub_l, ++ 1); // per line ++ ++ refcyc_per_line_delivery_l = get_refcyc_per_delivery(mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_l, ++ hscale_pixel_rate_l, ++ swath_width_pixels_ub_l, ++ 1); // per line ++ ++ dml_print("DML_DLG: %s: full_recout_width = %d\n", ++ __func__, ++ full_recout_width); ++ dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", ++ __func__, ++ hscale_pixel_rate_l); ++ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", ++ __func__, ++ refcyc_per_line_delivery_pre_l); ++ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", ++ __func__, ++ refcyc_per_line_delivery_l); ++ ++ if (dual_plane) { ++ refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_pre_c, ++ hscale_pixel_rate_c, ++ swath_width_pixels_ub_c, ++ 1); // per line ++ ++ refcyc_per_line_delivery_c = get_refcyc_per_delivery(mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_c, ++ hscale_pixel_rate_c, ++ swath_width_pixels_ub_c, ++ 1); // per line ++ ++ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", ++ __func__, ++ refcyc_per_line_delivery_pre_c); ++ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", ++ __func__, ++ refcyc_per_line_delivery_c); ++ } ++ ++ // TTU - Luma / Chroma ++ if (access_dir) { // vertical access ++ scaler_rec_in_width_l = vp_height_l; ++ scaler_rec_in_width_c = vp_height_c; ++ } else { ++ scaler_rec_in_width_l = vp_width_l; ++ scaler_rec_in_width_c = vp_width_c; ++ } ++ ++ refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_pre_l, ++ hscale_pixel_rate_l, ++ scaler_rec_in_width_l, ++ req_per_swath_ub_l); // per req ++ refcyc_per_req_delivery_l = get_refcyc_per_delivery(mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_l, ++ hscale_pixel_rate_l, ++ scaler_rec_in_width_l, ++ req_per_swath_ub_l); // per req ++ ++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", ++ __func__, ++ refcyc_per_req_delivery_pre_l); ++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", ++ __func__, ++ refcyc_per_req_delivery_l); ++ ++ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); ++ ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); ++ ++ if (dual_plane) { ++ refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_pre_c, ++ hscale_pixel_rate_c, ++ scaler_rec_in_width_c, ++ req_per_swath_ub_c); // per req ++ refcyc_per_req_delivery_c = get_refcyc_per_delivery(mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_c, ++ hscale_pixel_rate_c, ++ scaler_rec_in_width_c, ++ req_per_swath_ub_c); // per req ++ ++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", ++ __func__, ++ refcyc_per_req_delivery_pre_c); ++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", ++ __func__, ++ refcyc_per_req_delivery_c); ++ ++ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); ++ ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); ++ } ++ ++ // XFC ++ xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ xfc_precharge_delay = get_xfc_precharge_delay(mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency(mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency; ++ xfc_prefetch_margin = get_xfc_prefetch_margin(mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ ++ // TTU - Cursor ++ refcyc_per_req_delivery_pre_cur0 = 0.0; ++ refcyc_per_req_delivery_cur0 = 0.0; ++ if (src->num_cursors > 0) { ++ calculate_ttu_cursor(mode_lib, ++ &refcyc_per_req_delivery_pre_cur0, ++ &refcyc_per_req_delivery_cur0, ++ refclk_freq_in_mhz, ++ ref_freq_to_pix_freq, ++ hscale_pixel_rate_l, ++ scl->hscl_ratio, ++ vratio_pre_l, ++ vratio_l, ++ src->cur0_src_width, ++ (enum cursor_bpp)(src->cur0_bpp)); ++ } ++ ++ refcyc_per_req_delivery_pre_cur1 = 0.0; ++ refcyc_per_req_delivery_cur1 = 0.0; ++ if (src->num_cursors > 1) { ++ calculate_ttu_cursor(mode_lib, ++ &refcyc_per_req_delivery_pre_cur1, ++ &refcyc_per_req_delivery_cur1, ++ refclk_freq_in_mhz, ++ ref_freq_to_pix_freq, ++ hscale_pixel_rate_l, ++ scl->hscl_ratio, ++ vratio_pre_l, ++ vratio_l, ++ src->cur1_src_width, ++ (enum cursor_bpp)(src->cur1_bpp)); ++ } ++ ++ // TTU - Misc ++ // all hard-coded ++ ++ // Assignment to register structures ++ disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line ++ disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk ++ ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13)); ++ disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); ++ disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); ++ disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); ++ disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); ++ disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); ++ ++ disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); ++ disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); ++ ++ disp_dlg_regs->refcyc_per_pte_group_vblank_l = ++ (unsigned int) (dst_y_per_row_vblank * (double) htotal ++ * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); ++ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13)); ++ ++ if (dual_plane) { ++ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank ++ * (double) htotal * ref_freq_to_pix_freq ++ / (double) dpte_groups_per_row_ub_c); ++ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c ++ < (unsigned int) dml_pow(2, 13)); ++ } ++ ++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = ++ (unsigned int) (dst_y_per_row_vblank * (double) htotal ++ * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); ++ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13)); ++ ++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = ++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now ++ ++ disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal ++ * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; ++ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal ++ * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; ++ ++ if (dual_plane) { ++ disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip ++ * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; ++ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip ++ * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; ++ } ++ ++ disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l ++ / (double) vratio_l * dml_pow(2, 2)); ++ ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17)); ++ ++ if (dual_plane) { ++ disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c ++ / (double) vratio_c * dml_pow(2, 2)); ++ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { ++ dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", ++ __func__, ++ disp_dlg_regs->dst_y_per_pte_row_nom_c, ++ (unsigned int) dml_pow(2, 17) - 1); ++ } ++ } ++ ++ disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l ++ / (double) vratio_l * dml_pow(2, 2)); ++ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17)); ++ ++ disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now ++ ++ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l ++ / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq ++ / (double) dpte_groups_per_row_ub_l); ++ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; ++ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l ++ / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq ++ / (double) meta_chunks_per_row_ub_l); ++ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; ++ ++ if (dual_plane) { ++ disp_dlg_regs->refcyc_per_pte_group_nom_c = ++ (unsigned int) ((double) dpte_row_height_c / (double) vratio_c ++ * (double) htotal * ref_freq_to_pix_freq ++ / (double) dpte_groups_per_row_ub_c); ++ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; ++ ++ // TODO: Is this the right calculation? Does htotal need to be halved? ++ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = ++ (unsigned int) ((double) meta_row_height_c / (double) vratio_c ++ * (double) htotal * ref_freq_to_pix_freq ++ / (double) meta_chunks_per_row_ub_c); ++ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; ++ } ++ ++ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, ++ 1); ++ disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, ++ 1); ++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13)); ++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13)); ++ ++ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, ++ 1); ++ disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, ++ 1); ++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13)); ++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13)); ++ ++ disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; ++ disp_dlg_regs->dst_y_offset_cur0 = 0; ++ disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; ++ disp_dlg_regs->dst_y_offset_cur1 = 0; ++ ++ disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay; ++ disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay; ++ disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency; ++ disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil(xfc_prefetch_margin * refclk_freq_in_mhz, ++ 1); ++ ++ // slave has to have this value also set to off ++ if (src->xfc_enable && !src->xfc_slave) ++ disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1); ++ else ++ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off ++ ++ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = ++ (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = ++ (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 ++ * dml_pow(2, 10)); ++ disp_ttu_regs->qos_level_low_wm = 0; ++ ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); ++ disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal ++ * ref_freq_to_pix_freq); ++ /*ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));*/ ++ ++ disp_ttu_regs->qos_level_flip = 14; ++ disp_ttu_regs->qos_level_fixed_l = 8; ++ disp_ttu_regs->qos_level_fixed_c = 8; ++ disp_ttu_regs->qos_level_fixed_cur0 = 8; ++ disp_ttu_regs->qos_ramp_disable_l = 0; ++ disp_ttu_regs->qos_ramp_disable_c = 0; ++ disp_ttu_regs->qos_ramp_disable_cur0 = 0; ++ ++ disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; ++ ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); ++ ++ print__ttu_regs_st(mode_lib, *disp_ttu_regs); ++ print__dlg_regs_st(mode_lib, *disp_dlg_regs); ++} ++ ++void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, ++ display_dlg_regs_st *dlg_regs, ++ display_ttu_regs_st *ttu_regs, ++ display_e2e_pipe_params_st *e2e_pipe_param, ++ const unsigned int num_pipes, ++ const unsigned int pipe_idx, ++ const bool cstate_en, ++ const bool pstate_en, ++ const bool vm_en, ++ const bool ignore_viewport_pos, ++ const bool immediate_flip_support) ++{ ++ display_rq_params_st rq_param = {0}; ++ display_dlg_sys_params_st dlg_sys_param = {0}; ++ ++ // Get watermark and Tex. ++ dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, ++ e2e_pipe_param, ++ num_pipes); ++ dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, ++ e2e_pipe_param, ++ num_pipes); ++ dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, ++ e2e_pipe_param, ++ num_pipes); ++ dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency ++ / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated ++ ++ print__dlg_sys_params_st(mode_lib, dlg_sys_param); ++ ++ // system parameter calculation done ++ ++ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); ++ dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe.src); ++ dml20v2_rq_dlg_get_dlg_params(mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx, ++ dlg_regs, ++ ttu_regs, ++ rq_param.dlg, ++ dlg_sys_param, ++ cstate_en, ++ pstate_en); ++ dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); ++} ++ ++static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, ++ double *refcyc_per_req_delivery_pre_cur, ++ double *refcyc_per_req_delivery_cur, ++ double refclk_freq_in_mhz, ++ double ref_freq_to_pix_freq, ++ double hscale_pixel_rate_l, ++ double hscl_ratio, ++ double vratio_pre_l, ++ double vratio_l, ++ unsigned int cur_width, ++ enum cursor_bpp cur_bpp) ++{ ++ unsigned int cur_src_width = cur_width; ++ unsigned int cur_req_size = 0; ++ unsigned int cur_req_width = 0; ++ double cur_width_ub = 0.0; ++ double cur_req_per_width = 0.0; ++ double hactive_cur = 0.0; ++ ++ ASSERT(cur_src_width <= 256); ++ ++ *refcyc_per_req_delivery_pre_cur = 0.0; ++ *refcyc_per_req_delivery_cur = 0.0; ++ if (cur_src_width > 0) { ++ unsigned int cur_bit_per_pixel = 0; ++ ++ if (cur_bpp == dm_cur_2bit) { ++ cur_req_size = 64; // byte ++ cur_bit_per_pixel = 2; ++ } else { // 32bit ++ cur_bit_per_pixel = 32; ++ if (cur_src_width >= 1 && cur_src_width <= 16) ++ cur_req_size = 64; ++ else if (cur_src_width >= 17 && cur_src_width <= 31) ++ cur_req_size = 128; ++ else ++ cur_req_size = 256; ++ } ++ ++ cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); ++ cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) ++ * (double) cur_req_width; ++ cur_req_per_width = cur_width_ub / (double) cur_req_width; ++ hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor ++ ++ if (vratio_pre_l <= 1.0) { ++ *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq ++ / (double) cur_req_per_width; ++ } else { ++ *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz ++ * (double) cur_src_width / hscale_pixel_rate_l ++ / (double) cur_req_per_width; ++ } ++ ++ ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); ++ ++ if (vratio_l <= 1.0) { ++ *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq ++ / (double) cur_req_per_width; ++ } else { ++ *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz ++ * (double) cur_src_width / hscale_pixel_rate_l ++ / (double) cur_req_per_width; ++ } ++ ++ dml_print("DML_DLG: %s: cur_req_width = %d\n", ++ __func__, ++ cur_req_width); ++ dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", ++ __func__, ++ cur_width_ub); ++ dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", ++ __func__, ++ cur_req_per_width); ++ dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", ++ __func__, ++ hactive_cur); ++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", ++ __func__, ++ *refcyc_per_req_delivery_pre_cur); ++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", ++ __func__, ++ *refcyc_per_req_delivery_cur); ++ ++ ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); ++ } ++} +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h +new file mode 100644 +index 000000000000..0378406bf7e7 +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h +@@ -0,0 +1,74 @@ ++/* ++ * Copyright 2018 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#ifndef __DML20V2_DISPLAY_RQ_DLG_CALC_H__ ++#define __DML20V2_DISPLAY_RQ_DLG_CALC_H__ ++ ++#include "../dml_common_defs.h" ++#include "../display_rq_dlg_helpers.h" ++ ++struct display_mode_lib; ++ ++ ++// Function: dml_rq_dlg_get_rq_reg ++// Main entry point for test to get the register values out of this DML class. ++// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate ++// and then populate the rq_regs struct ++// Input: ++// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) ++// Output: ++// rq_regs - struct that holds all the RQ registers field value. ++// See also: <display_rq_regs_st> ++void dml20v2_rq_dlg_get_rq_reg( ++ struct display_mode_lib *mode_lib, ++ display_rq_regs_st *rq_regs, ++ const display_pipe_params_st pipe_param); ++ ++ ++// Function: dml_rq_dlg_get_dlg_reg ++// Calculate and return DLG and TTU register struct given the system setting ++// Output: ++// dlg_regs - output DLG register struct ++// ttu_regs - output DLG TTU register struct ++// Input: ++// e2e_pipe_param - "compacted" array of e2e pipe param struct ++// num_pipes - num of active "pipe" or "route" ++// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg ++// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. ++// Added for legacy or unrealistic timing tests. ++void dml20v2_rq_dlg_get_dlg_reg( ++ struct display_mode_lib *mode_lib, ++ display_dlg_regs_st *dlg_regs, ++ display_ttu_regs_st *ttu_regs, ++ display_e2e_pipe_params_st *e2e_pipe_param, ++ const unsigned int num_pipes, ++ const unsigned int pipe_idx, ++ const bool cstate_en, ++ const bool pstate_en, ++ const bool vm_en, ++ const bool ignore_viewport_pos, ++ const bool immediate_flip_support); ++ ++#endif +diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +index 91810c7d5cf5..96dfcd8c36bc 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c ++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +@@ -28,6 +28,8 @@ + #if defined(CONFIG_DRM_AMD_DC_DCN2_0) + #include "dcn20/display_mode_vba_20.h" + #include "dcn20/display_rq_dlg_calc_20.h" ++#include "dcn20/display_mode_vba_20v2.h" ++#include "dcn20/display_rq_dlg_calc_20v2.h" + #endif + + #if defined(CONFIG_DRM_AMD_DC_DCN2_0) +@@ -37,6 +39,13 @@ const struct dml_funcs dml20_funcs = { + .rq_dlg_get_dlg_reg = dml20_rq_dlg_get_dlg_reg, + .rq_dlg_get_rq_reg = dml20_rq_dlg_get_rq_reg + }; ++ ++const struct dml_funcs dml20v2_funcs = { ++ .validate = dml20v2_ModeSupportAndSystemConfigurationFull, ++ .recalculate = dml20v2_recalculate, ++ .rq_dlg_get_dlg_reg = dml20v2_rq_dlg_get_dlg_reg, ++ .rq_dlg_get_rq_reg = dml20v2_rq_dlg_get_rq_reg ++}; + #endif + + void dml_init_instance(struct display_mode_lib *lib, +@@ -52,6 +61,9 @@ void dml_init_instance(struct display_mode_lib *lib, + case DML_PROJECT_NAVI10: + lib->funcs = dml20_funcs; + break; ++ case DML_PROJECT_NAVI10v2: ++ lib->funcs = dml20v2_funcs; ++ break; + #endif + default: + break; +diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +index 5bf13d67f289..870716e3c132 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h ++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +@@ -36,6 +36,7 @@ enum dml_project { + DML_PROJECT_RAVEN1, + #ifdef CONFIG_DRM_AMD_DC_DCN2_0 + DML_PROJECT_NAVI10, ++ DML_PROJECT_NAVI10v2, + #endif + }; + +diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +index 5678472546ab..ab34fd26702f 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h ++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +@@ -100,6 +100,7 @@ struct _vcs_dpi_soc_bounding_box_st { + unsigned int vmm_page_size_bytes; + unsigned int hostvm_min_page_size_bytes; + double dram_clock_change_latency_us; ++ double dummy_pstate_latency_us; + double writeback_dram_clock_change_latency_us; + unsigned int return_bus_width_bytes; + unsigned int voltage_override; +diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +index 4d2a1262d9db..88e63f16f7fc 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c ++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +@@ -568,6 +568,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) + if (src->is_hsplit) { + for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) { + display_pipe_source_params_st *src_k = &pipes[k].pipe.src; ++ display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest; + + if (src_k->is_hsplit && !visited[k] + && src->hsplit_grp == src_k->hsplit_grp) { +@@ -575,12 +576,15 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) + mode_lib->vba.NumberOfActivePlanes; + mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes]++; + if (mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes] +- == dm_horz) ++ == dm_horz) { + mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] += + src_k->viewport_width; +- else ++ mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] += ++ dst_k->recout_width; ++ } else { + mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] += + src_k->viewport_height; ++ } + + visited[k] = true; + } +-- +2.17.1 + |