aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch8131
1 files changed, 8131 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch
new file mode 100644
index 00000000..ed24a880
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch
@@ -0,0 +1,8131 @@
+From 847b6b99af63802da2d600fe136ba893f75b0288 Mon Sep 17 00:00:00 2001
+From: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
+Date: Fri, 26 Jul 2019 16:52:06 -0400
+Subject: [PATCH 3642/4256] drm/amd/display: Add Renoir DML
+
+DML provides the display configuration validation as provided
+by the hw teams.
+
+Acked-by: Harry Wentland <harry.wentland@amd.com>
+Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 +
+ .../dc/dml/dcn21/display_mode_vba_21.c | 6123 +++++++++++++++++
+ .../dc/dml/dcn21/display_mode_vba_21.h | 32 +
+ .../dc/dml/dcn21/display_rq_dlg_calc_21.c | 1823 +++++
+ .../dc/dml/dcn21/display_rq_dlg_calc_21.h | 73 +
+ .../drm/amd/display/dc/dml/display_mode_lib.h | 3 +
+ 6 files changed, 8058 insertions(+)
+ create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+ create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h
+ create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+ create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
+index 95fd2beca80c..b267c0fc64e7 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
++++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
+@@ -45,6 +45,10 @@ CFLAGS_display_rq_dlg_calc_20.o := $(dml_ccflags)
+ CFLAGS_display_mode_vba_20v2.o := $(dml_ccflags)
+ CFLAGS_display_rq_dlg_calc_20v2.o := $(dml_ccflags)
+ endif
++ifdef CONFIG_DRM_AMD_DC_DCN2_1
++CFLAGS_display_mode_vba_21.o := $(dml_ccflags)
++CFLAGS_display_rq_dlg_calc_21.o := $(dml_ccflags)
++endif
+ ifdef CONFIG_DRM_AMD_DCN3AG
+ CFLAGS_display_mode_vba_3ag.o := $(dml_ccflags)
+ endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+new file mode 100644
+index 000000000000..456cd0e3289c
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+@@ -0,0 +1,6123 @@
++/*
++ * Copyright 2017 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#ifdef CONFIG_DRM_AMD_DC_DCN2_0
++
++#include "../display_mode_lib.h"
++#include "../dml_inline_defs.h"
++#include "../display_mode_vba.h"
++#include "display_mode_vba_21.h"
++
++
++/*
++ * NOTE:
++ * This file is gcc-parsable HW gospel, coming straight from HW engineers.
++ *
++ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
++ * ways. Unless there is something clearly wrong with it the code should
++ * remain as-is as it provides us with a guarantee from HW that it is correct.
++ */
++
++typedef unsigned int uint;
++
++typedef struct {
++ double DPPCLK;
++ double DISPCLK;
++ double PixelClock;
++ double DCFCLKDeepSleep;
++ unsigned int DPPPerPlane;
++ bool ScalerEnabled;
++ enum scan_direction_class SourceScan;
++ unsigned int BlockWidth256BytesY;
++ unsigned int BlockHeight256BytesY;
++ unsigned int BlockWidth256BytesC;
++ unsigned int BlockHeight256BytesC;
++ unsigned int InterlaceEnable;
++ unsigned int NumberOfCursors;
++ unsigned int VBlank;
++ unsigned int HTotal;
++} Pipe;
++
++typedef struct {
++ bool Enable;
++ unsigned int MaxPageTableLevels;
++ unsigned int CachedPageTableLevels;
++} HostVM;
++
++#define BPP_INVALID 0
++#define BPP_BLENDED_PIPE 0xffffffff
++
++static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
++static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
++ struct display_mode_lib *mode_lib);
++static unsigned int dscceComputeDelay(
++ unsigned int bpc,
++ double bpp,
++ unsigned int sliceWidth,
++ unsigned int numSlices,
++ enum output_format_class pixelFormat);
++static unsigned int dscComputeDelay(enum output_format_class pixelFormat);
++// Super monster function with some 45 argument
++static bool CalculatePrefetchSchedule(
++ struct display_mode_lib *mode_lib,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ Pipe *myPipe,
++ unsigned int DSCDelay,
++ double DPPCLKDelaySubtotal,
++ double DPPCLKDelaySCL,
++ double DPPCLKDelaySCLLBOnly,
++ double DPPCLKDelayCNVCFormater,
++ double DPPCLKDelayCNVCCursor,
++ double DISPCLKDelaySubtotal,
++ unsigned int ScalerRecoutWidth,
++ enum output_format_class OutputFormat,
++ unsigned int MaxInterDCNTileRepeaters,
++ unsigned int VStartup,
++ unsigned int MaxVStartup,
++ unsigned int GPUVMPageTableLevels,
++ bool GPUVMEnable,
++ HostVM *myHostVM,
++ bool DynamicMetadataEnable,
++ int DynamicMetadataLinesBeforeActiveRequired,
++ unsigned int DynamicMetadataTransmittedBytes,
++ bool DCCEnable,
++ double UrgentLatency,
++ double UrgentExtraLatency,
++ double TCalc,
++ unsigned int PDEAndMetaPTEBytesFrame,
++ unsigned int MetaRowByte,
++ unsigned int PixelPTEBytesPerRow,
++ double PrefetchSourceLinesY,
++ unsigned int SwathWidthY,
++ double BytePerPixelDETY,
++ double VInitPreFillY,
++ unsigned int MaxNumSwathY,
++ double PrefetchSourceLinesC,
++ double BytePerPixelDETC,
++ double VInitPreFillC,
++ unsigned int MaxNumSwathC,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ double TWait,
++ bool XFCEnabled,
++ double XFCRemoteSurfaceFlipDelay,
++ bool ProgressiveToInterlaceUnitInOPP,
++ double *DSTXAfterScaler,
++ double *DSTYAfterScaler,
++ double *DestinationLinesForPrefetch,
++ double *PrefetchBandwidth,
++ double *DestinationLinesToRequestVMInVBlank,
++ double *DestinationLinesToRequestRowInVBlank,
++ double *VRatioPrefetchY,
++ double *VRatioPrefetchC,
++ double *RequiredPrefetchPixDataBWLuma,
++ double *RequiredPrefetchPixDataBWChroma,
++ unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
++ double *Tno_bw,
++ double *prefetch_vmrow_bw,
++ unsigned int *swath_width_luma_ub,
++ unsigned int *swath_width_chroma_ub,
++ unsigned int *VUpdateOffsetPix,
++ double *VUpdateWidthPix,
++ double *VReadyOffsetPix);
++static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
++static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
++static double CalculateDCCConfiguration(
++ bool DCCEnabled,
++ bool DCCProgrammingAssumesScanDirectionUnknown,
++ unsigned int ViewportWidth,
++ unsigned int ViewportHeight,
++ double DETBufferSize,
++ unsigned int RequestHeight256Byte,
++ unsigned int SwathHeight,
++ enum dm_swizzle_mode TilingFormat,
++ unsigned int BytePerPixel,
++ enum scan_direction_class ScanOrientation,
++ unsigned int *MaxUncompressedBlock,
++ unsigned int *MaxCompressedBlock,
++ unsigned int *Independent64ByteBlock);
++static double CalculatePrefetchSourceLines(
++ struct display_mode_lib *mode_lib,
++ double VRatio,
++ double vtaps,
++ bool Interlace,
++ bool ProgressiveToInterlaceUnitInOPP,
++ unsigned int SwathHeight,
++ unsigned int ViewportYStart,
++ double *VInitPreFill,
++ unsigned int *MaxNumSwath);
++static unsigned int CalculateVMAndRowBytes(
++ struct display_mode_lib *mode_lib,
++ bool DCCEnable,
++ unsigned int BlockHeight256Bytes,
++ unsigned int BlockWidth256Bytes,
++ enum source_format_class SourcePixelFormat,
++ unsigned int SurfaceTiling,
++ unsigned int BytePerPixel,
++ enum scan_direction_class ScanDirection,
++ unsigned int ViewportWidth,
++ unsigned int ViewportHeight,
++ unsigned int SwathWidthY,
++ bool GPUVMEnable,
++ bool HostVMEnable,
++ unsigned int HostVMMaxPageTableLevels,
++ unsigned int HostVMCachedPageTableLevels,
++ unsigned int VMMPageSize,
++ unsigned int PTEBufferSizeInRequests,
++ unsigned int Pitch,
++ unsigned int DCCMetaPitch,
++ unsigned int *MacroTileWidth,
++ unsigned int *MetaRowByte,
++ unsigned int *PixelPTEBytesPerRow,
++ bool *PTEBufferSizeNotExceeded,
++ unsigned int *dpte_row_width_ub,
++ unsigned int *dpte_row_height,
++ unsigned int *MetaRequestWidth,
++ unsigned int *MetaRequestHeight,
++ unsigned int *meta_row_width,
++ unsigned int *meta_row_height,
++ unsigned int *vm_group_bytes,
++ long *dpte_group_bytes,
++ unsigned int *PixelPTEReqWidth,
++ unsigned int *PixelPTEReqHeight,
++ unsigned int *PTERequestSize,
++ unsigned int *DPDE0BytesFrame,
++ unsigned int *MetaPTEBytesFrame);
++
++static double CalculateTWait(
++ unsigned int PrefetchMode,
++ double DRAMClockChangeLatency,
++ double UrgentLatency,
++ double SREnterPlusExitTime);
++static double CalculateRemoteSurfaceFlipDelay(
++ struct display_mode_lib *mode_lib,
++ double VRatio,
++ double SwathWidth,
++ double Bpp,
++ double LineTime,
++ double XFCTSlvVupdateOffset,
++ double XFCTSlvVupdateWidth,
++ double XFCTSlvVreadyOffset,
++ double XFCXBUFLatencyTolerance,
++ double XFCFillBWOverhead,
++ double XFCSlvChunkSize,
++ double XFCBusTransportTime,
++ double TCalc,
++ double TWait,
++ double *SrcActiveDrainRate,
++ double *TInitXFill,
++ double *TslvChk);
++static void CalculateActiveRowBandwidth(
++ bool GPUVMEnable,
++ enum source_format_class SourcePixelFormat,
++ double VRatio,
++ bool DCCEnable,
++ double LineTime,
++ unsigned int MetaRowByteLuma,
++ unsigned int MetaRowByteChroma,
++ unsigned int meta_row_height_luma,
++ unsigned int meta_row_height_chroma,
++ unsigned int PixelPTEBytesPerRowLuma,
++ unsigned int PixelPTEBytesPerRowChroma,
++ unsigned int dpte_row_height_luma,
++ unsigned int dpte_row_height_chroma,
++ double *meta_row_bw,
++ double *dpte_row_bw);
++static void CalculateFlipSchedule(
++ struct display_mode_lib *mode_lib,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ double UrgentExtraLatency,
++ double UrgentLatency,
++ unsigned int GPUVMMaxPageTableLevels,
++ bool HostVMEnable,
++ unsigned int HostVMMaxPageTableLevels,
++ unsigned int HostVMCachedPageTableLevels,
++ bool GPUVMEnable,
++ double PDEAndMetaPTEBytesPerFrame,
++ double MetaRowBytes,
++ double DPTEBytesPerRow,
++ double BandwidthAvailableForImmediateFlip,
++ unsigned int TotImmediateFlipBytes,
++ enum source_format_class SourcePixelFormat,
++ double LineTime,
++ double VRatio,
++ double Tno_bw,
++ bool DCCEnable,
++ unsigned int dpte_row_height,
++ unsigned int meta_row_height,
++ unsigned int dpte_row_height_chroma,
++ unsigned int meta_row_height_chroma,
++ double *DestinationLinesToRequestVMInImmediateFlip,
++ double *DestinationLinesToRequestRowInImmediateFlip,
++ double *final_flip_bw,
++ bool *ImmediateFlipSupportedForPipe);
++static double CalculateWriteBackDelay(
++ enum source_format_class WritebackPixelFormat,
++ double WritebackHRatio,
++ double WritebackVRatio,
++ unsigned int WritebackLumaHTaps,
++ unsigned int WritebackLumaVTaps,
++ unsigned int WritebackChromaHTaps,
++ unsigned int WritebackChromaVTaps,
++ unsigned int WritebackDestinationWidth);
++static void CalculateWatermarksAndDRAMSpeedChangeSupport(
++ struct display_mode_lib *mode_lib,
++ unsigned int PrefetchMode,
++ unsigned int NumberOfActivePlanes,
++ unsigned int MaxLineBufferLines,
++ unsigned int LineBufferSize,
++ unsigned int DPPOutputBufferPixels,
++ double DETBufferSizeInKByte,
++ unsigned int WritebackInterfaceLumaBufferSize,
++ unsigned int WritebackInterfaceChromaBufferSize,
++ double DCFCLK,
++ double UrgentOutOfOrderReturn,
++ double ReturnBW,
++ bool GPUVMEnable,
++ long dpte_group_bytes[],
++ unsigned int MetaChunkSize,
++ double UrgentLatency,
++ double ExtraLatency,
++ double WritebackLatency,
++ double WritebackChunkSize,
++ double SOCCLK,
++ double DRAMClockChangeLatency,
++ double SRExitTime,
++ double SREnterPlusExitTime,
++ double DCFCLKDeepSleep,
++ int DPPPerPlane[],
++ bool DCCEnable[],
++ double DPPCLK[],
++ unsigned int SwathWidthSingleDPPY[],
++ unsigned int SwathHeightY[],
++ double ReadBandwidthPlaneLuma[],
++ unsigned int SwathHeightC[],
++ double ReadBandwidthPlaneChroma[],
++ unsigned int LBBitPerPixel[],
++ unsigned int SwathWidthY[],
++ double HRatio[],
++ unsigned int vtaps[],
++ unsigned int VTAPsChroma[],
++ double VRatio[],
++ unsigned int HTotal[],
++ double PixelClock[],
++ unsigned int BlendingAndTiming[],
++ double BytePerPixelDETY[],
++ double BytePerPixelDETC[],
++ bool WritebackEnable[],
++ enum source_format_class WritebackPixelFormat[],
++ double WritebackDestinationWidth[],
++ double WritebackDestinationHeight[],
++ double WritebackSourceHeight[],
++ enum clock_change_support *DRAMClockChangeSupport,
++ double *UrgentWatermark,
++ double *WritebackUrgentWatermark,
++ double *DRAMClockChangeWatermark,
++ double *WritebackDRAMClockChangeWatermark,
++ double *StutterExitWatermark,
++ double *StutterEnterPlusExitWatermark,
++ double *MinActiveDRAMClockChangeLatencySupported);
++static void CalculateDCFCLKDeepSleep(
++ struct display_mode_lib *mode_lib,
++ unsigned int NumberOfActivePlanes,
++ double BytePerPixelDETY[],
++ double BytePerPixelDETC[],
++ double VRatio[],
++ unsigned int SwathWidthY[],
++ int DPPPerPlane[],
++ double HRatio[],
++ double PixelClock[],
++ double PSCL_THROUGHPUT[],
++ double PSCL_THROUGHPUT_CHROMA[],
++ double DPPCLK[],
++ double *DCFCLKDeepSleep);
++static void CalculateDETBufferSize(
++ double DETBufferSizeInKByte,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ double *DETBufferSizeY,
++ double *DETBufferSizeC);
++static void CalculateUrgentBurstFactor(
++ unsigned int DETBufferSizeInKByte,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ unsigned int SwathWidthY,
++ double LineTime,
++ double UrgentLatency,
++ double CursorBufferSize,
++ unsigned int CursorWidth,
++ unsigned int CursorBPP,
++ double VRatio,
++ double VRatioPreY,
++ double VRatioPreC,
++ double BytePerPixelInDETY,
++ double BytePerPixelInDETC,
++ double *UrgentBurstFactorCursor,
++ double *UrgentBurstFactorCursorPre,
++ double *UrgentBurstFactorLuma,
++ double *UrgentBurstFactorLumaPre,
++ double *UrgentBurstFactorChroma,
++ double *UrgentBurstFactorChromaPre,
++ unsigned int *NotEnoughUrgentLatencyHiding,
++ unsigned int *NotEnoughUrgentLatencyHidingPre);
++
++static void CalculatePixelDeliveryTimes(
++ unsigned int NumberOfActivePlanes,
++ double VRatio[],
++ double VRatioPrefetchY[],
++ double VRatioPrefetchC[],
++ unsigned int swath_width_luma_ub[],
++ unsigned int swath_width_chroma_ub[],
++ int DPPPerPlane[],
++ double HRatio[],
++ double PixelClock[],
++ double PSCL_THROUGHPUT[],
++ double PSCL_THROUGHPUT_CHROMA[],
++ double DPPCLK[],
++ double BytePerPixelDETC[],
++ enum scan_direction_class SourceScan[],
++ unsigned int BlockWidth256BytesY[],
++ unsigned int BlockHeight256BytesY[],
++ unsigned int BlockWidth256BytesC[],
++ unsigned int BlockHeight256BytesC[],
++ double DisplayPipeLineDeliveryTimeLuma[],
++ double DisplayPipeLineDeliveryTimeChroma[],
++ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
++ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
++ double DisplayPipeRequestDeliveryTimeLuma[],
++ double DisplayPipeRequestDeliveryTimeChroma[],
++ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
++ double DisplayPipeRequestDeliveryTimeChromaPrefetch[]);
++
++static void CalculateMetaAndPTETimes(
++ unsigned int NumberOfActivePlanes,
++ bool GPUVMEnable,
++ unsigned int MetaChunkSize,
++ unsigned int MinMetaChunkSizeBytes,
++ unsigned int GPUVMMaxPageTableLevels,
++ unsigned int HTotal[],
++ double VRatio[],
++ double VRatioPrefetchY[],
++ double VRatioPrefetchC[],
++ double DestinationLinesToRequestRowInVBlank[],
++ double DestinationLinesToRequestRowInImmediateFlip[],
++ double DestinationLinesToRequestVMInVBlank[],
++ double DestinationLinesToRequestVMInImmediateFlip[],
++ bool DCCEnable[],
++ double PixelClock[],
++ double BytePerPixelDETY[],
++ double BytePerPixelDETC[],
++ enum scan_direction_class SourceScan[],
++ unsigned int dpte_row_height[],
++ unsigned int dpte_row_height_chroma[],
++ unsigned int meta_row_width[],
++ unsigned int meta_row_height[],
++ unsigned int meta_req_width[],
++ unsigned int meta_req_height[],
++ long dpte_group_bytes[],
++ unsigned int PTERequestSizeY[],
++ unsigned int PTERequestSizeC[],
++ unsigned int PixelPTEReqWidthY[],
++ unsigned int PixelPTEReqHeightY[],
++ unsigned int PixelPTEReqWidthC[],
++ unsigned int PixelPTEReqHeightC[],
++ unsigned int dpte_row_width_luma_ub[],
++ unsigned int dpte_row_width_chroma_ub[],
++ unsigned int vm_group_bytes[],
++ unsigned int dpde0_bytes_per_frame_ub_l[],
++ unsigned int dpde0_bytes_per_frame_ub_c[],
++ unsigned int meta_pte_bytes_per_frame_ub_l[],
++ unsigned int meta_pte_bytes_per_frame_ub_c[],
++ double DST_Y_PER_PTE_ROW_NOM_L[],
++ double DST_Y_PER_PTE_ROW_NOM_C[],
++ double DST_Y_PER_META_ROW_NOM_L[],
++ double TimePerMetaChunkNominal[],
++ double TimePerMetaChunkVBlank[],
++ double TimePerMetaChunkFlip[],
++ double time_per_pte_group_nom_luma[],
++ double time_per_pte_group_vblank_luma[],
++ double time_per_pte_group_flip_luma[],
++ double time_per_pte_group_nom_chroma[],
++ double time_per_pte_group_vblank_chroma[],
++ double time_per_pte_group_flip_chroma[],
++ double TimePerVMGroupVBlank[],
++ double TimePerVMGroupFlip[],
++ double TimePerVMRequestVBlank[],
++ double TimePerVMRequestFlip[]);
++
++static double CalculateExtraLatency(
++ double UrgentRoundTripAndOutOfOrderLatency,
++ int TotalNumberOfActiveDPP,
++ int PixelChunkSizeInKByte,
++ int TotalNumberOfDCCActiveDPP,
++ int MetaChunkSize,
++ double ReturnBW,
++ bool GPUVMEnable,
++ bool HostVMEnable,
++ int NumberOfActivePlanes,
++ int NumberOfDPP[],
++ long dpte_group_bytes[],
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ int HostVMMaxPageTableLevels,
++ int HostVMCachedPageTableLevels);
++
++void dml21_recalculate(struct display_mode_lib *mode_lib)
++{
++ ModeSupportAndSystemConfiguration(mode_lib);
++ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
++ DisplayPipeConfiguration(mode_lib);
++ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
++}
++
++static unsigned int dscceComputeDelay(
++ unsigned int bpc,
++ double bpp,
++ unsigned int sliceWidth,
++ unsigned int numSlices,
++ enum output_format_class pixelFormat)
++{
++ // valid bpc = source bits per component in the set of {8, 10, 12}
++ // valid bpp = increments of 1/16 of a bit
++ // min = 6/7/8 in N420/N422/444, respectively
++ // max = such that compression is 1:1
++ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
++ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
++ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
++
++ // fixed value
++ unsigned int rcModelSize = 8192;
++
++ // N422/N420 operate at 2 pixels per clock
++ unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l,
++ Delay, pixels;
++
++ if (pixelFormat == dm_n422 || pixelFormat == dm_420)
++ pixelsPerClock = 2;
++ // #all other modes operate at 1 pixel per clock
++ else
++ pixelsPerClock = 1;
++
++ //initial transmit delay as per PPS
++ initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock);
++
++ //compute ssm delay
++ if (bpc == 8)
++ D = 81;
++ else if (bpc == 10)
++ D = 89;
++ else
++ D = 113;
++
++ //divide by pixel per cycle to compute slice width as seen by DSC
++ w = sliceWidth / pixelsPerClock;
++
++ //422 mode has an additional cycle of delay
++ if (pixelFormat == dm_s422)
++ S = 1;
++ else
++ S = 0;
++
++ //main calculation for the dscce
++ ix = initalXmitDelay + 45;
++ wx = (w + 2) / 3;
++ p = 3 * wx - w;
++ l0 = ix / w;
++ a = ix + p * l0;
++ ax = (a + 2) / 3 + D + 6 + 1;
++ l = (ax + wx - 1) / wx;
++ if ((ix % w) == 0 && p != 0)
++ lstall = 1;
++ else
++ lstall = 0;
++ Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22;
++
++ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
++ pixels = Delay * 3 * pixelsPerClock;
++ return pixels;
++}
++
++static unsigned int dscComputeDelay(enum output_format_class pixelFormat)
++{
++ unsigned int Delay = 0;
++
++ if (pixelFormat == dm_420) {
++ // sfr
++ Delay = Delay + 2;
++ // dsccif
++ Delay = Delay + 0;
++ // dscc - input deserializer
++ Delay = Delay + 3;
++ // dscc gets pixels every other cycle
++ Delay = Delay + 2;
++ // dscc - input cdc fifo
++ Delay = Delay + 12;
++ // dscc gets pixels every other cycle
++ Delay = Delay + 13;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output cdc fifo
++ Delay = Delay + 7;
++ // dscc gets pixels every other cycle
++ Delay = Delay + 3;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output serializer
++ Delay = Delay + 1;
++ // sft
++ Delay = Delay + 1;
++ } else if (pixelFormat == dm_n422) {
++ // sfr
++ Delay = Delay + 2;
++ // dsccif
++ Delay = Delay + 1;
++ // dscc - input deserializer
++ Delay = Delay + 5;
++ // dscc - input cdc fifo
++ Delay = Delay + 25;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output cdc fifo
++ Delay = Delay + 10;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output serializer
++ Delay = Delay + 1;
++ // sft
++ Delay = Delay + 1;
++ } else {
++ // sfr
++ Delay = Delay + 2;
++ // dsccif
++ Delay = Delay + 0;
++ // dscc - input deserializer
++ Delay = Delay + 3;
++ // dscc - input cdc fifo
++ Delay = Delay + 12;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output cdc fifo
++ Delay = Delay + 7;
++ // dscc - output serializer
++ Delay = Delay + 1;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // sft
++ Delay = Delay + 1;
++ }
++
++ return Delay;
++}
++
++static bool CalculatePrefetchSchedule(
++ struct display_mode_lib *mode_lib,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ Pipe *myPipe,
++ unsigned int DSCDelay,
++ double DPPCLKDelaySubtotal,
++ double DPPCLKDelaySCL,
++ double DPPCLKDelaySCLLBOnly,
++ double DPPCLKDelayCNVCFormater,
++ double DPPCLKDelayCNVCCursor,
++ double DISPCLKDelaySubtotal,
++ unsigned int ScalerRecoutWidth,
++ enum output_format_class OutputFormat,
++ unsigned int MaxInterDCNTileRepeaters,
++ unsigned int VStartup,
++ unsigned int MaxVStartup,
++ unsigned int GPUVMPageTableLevels,
++ bool GPUVMEnable,
++ HostVM *myHostVM,
++ bool DynamicMetadataEnable,
++ int DynamicMetadataLinesBeforeActiveRequired,
++ unsigned int DynamicMetadataTransmittedBytes,
++ bool DCCEnable,
++ double UrgentLatency,
++ double UrgentExtraLatency,
++ double TCalc,
++ unsigned int PDEAndMetaPTEBytesFrame,
++ unsigned int MetaRowByte,
++ unsigned int PixelPTEBytesPerRow,
++ double PrefetchSourceLinesY,
++ unsigned int SwathWidthY,
++ double BytePerPixelDETY,
++ double VInitPreFillY,
++ unsigned int MaxNumSwathY,
++ double PrefetchSourceLinesC,
++ double BytePerPixelDETC,
++ double VInitPreFillC,
++ unsigned int MaxNumSwathC,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ double TWait,
++ bool XFCEnabled,
++ double XFCRemoteSurfaceFlipDelay,
++ bool ProgressiveToInterlaceUnitInOPP,
++ double *DSTXAfterScaler,
++ double *DSTYAfterScaler,
++ double *DestinationLinesForPrefetch,
++ double *PrefetchBandwidth,
++ double *DestinationLinesToRequestVMInVBlank,
++ double *DestinationLinesToRequestRowInVBlank,
++ double *VRatioPrefetchY,
++ double *VRatioPrefetchC,
++ double *RequiredPrefetchPixDataBWLuma,
++ double *RequiredPrefetchPixDataBWChroma,
++ unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
++ double *Tno_bw,
++ double *prefetch_vmrow_bw,
++ unsigned int *swath_width_luma_ub,
++ unsigned int *swath_width_chroma_ub,
++ unsigned int *VUpdateOffsetPix,
++ double *VUpdateWidthPix,
++ double *VReadyOffsetPix)
++{
++ bool MyError = false;
++ unsigned int DPPCycles, DISPCLKCycles;
++ double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime;
++ double Tdm, LineTime, Tsetup;
++ double dst_y_prefetch_equ;
++ double Tsw_oto;
++ double prefetch_bw_oto;
++ double Tvm_oto;
++ double Tr0_oto;
++ double Tvm_oto_lines;
++ double Tr0_oto_lines;
++ double Tsw_oto_lines;
++ double dst_y_prefetch_oto;
++ double TimeForFetchingMetaPTE = 0;
++ double TimeForFetchingRowInVBlank = 0;
++ double LinesToRequestPrefetchPixelData = 0;
++ double HostVMInefficiencyFactor;
++ unsigned int HostVMDynamicLevels;
++
++ if (GPUVMEnable == true && myHostVM->Enable == true) {
++ HostVMInefficiencyFactor =
++ PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
++ / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
++ HostVMDynamicLevels = myHostVM->MaxPageTableLevels
++ - myHostVM->CachedPageTableLevels;
++ } else {
++ HostVMInefficiencyFactor = 1;
++ HostVMDynamicLevels = 0;
++ }
++
++ if (myPipe->ScalerEnabled)
++ DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL;
++ else
++ DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly;
++
++ DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
++
++ DISPCLKCycles = DISPCLKDelaySubtotal;
++
++ if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
++ return true;
++
++ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK
++ + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
++
++ if (myPipe->DPPPerPlane > 1)
++ *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth;
++
++ if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
++ *DSTYAfterScaler = 1;
++ else
++ *DSTYAfterScaler = 0;
++
++ DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler;
++ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
++ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
++
++ *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1);
++ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK);
++ *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime)
++ * myPipe->PixelClock;
++
++ *VReadyOffsetPix = dml_max(
++ 150.0 / myPipe->DPPCLK,
++ TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK)
++ * myPipe->PixelClock;
++
++ Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock;
++
++ LineTime = (double) myPipe->HTotal / myPipe->PixelClock;
++
++ if (DynamicMetadataEnable) {
++ double Tdmbf, Tdmec, Tdmsks;
++
++ Tdm = dml_max(0.0, UrgentExtraLatency - TCalc);
++ Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK;
++ Tdmec = LineTime;
++ if (DynamicMetadataLinesBeforeActiveRequired == -1)
++ Tdmsks = myPipe->VBlank * LineTime / 2.0;
++ else
++ Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime;
++ if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP)
++ Tdmsks = Tdmsks / 2;
++ if (VStartup * LineTime
++ < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) {
++ MyError = true;
++ *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait
++ + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime;
++ } else
++ *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0;
++ } else
++ Tdm = 0;
++
++ if (GPUVMEnable) {
++ if (GPUVMPageTableLevels >= 3)
++ *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1);
++ else
++ *Tno_bw = 0;
++ } else if (!DCCEnable)
++ *Tno_bw = LineTime;
++ else
++ *Tno_bw = LineTime / 4;
++
++ dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime
++ - (Tsetup + Tdm) / LineTime
++ - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
++
++ Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
++
++ if (myPipe->SourceScan == dm_horz) {
++ *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
++ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
++ } else {
++ *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
++ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
++ }
++
++ prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
++
++
++ if (GPUVMEnable == true) {
++ Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
++ dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1),
++ LineTime / 4.0));
++ } else
++ Tvm_oto = LineTime / 4.0;
++
++ if ((GPUVMEnable == true || DCCEnable == true)) {
++ Tr0_oto = dml_max(
++ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
++ dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4)));
++ } else
++ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
++
++ Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0;
++ Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0;
++ Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0;
++ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75;
++
++ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
++
++ if (dst_y_prefetch_oto < dst_y_prefetch_equ)
++ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
++ else
++ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
++
++ dml_print("DML: VStartup: %d\n", VStartup);
++ dml_print("DML: TCalc: %f\n", TCalc);
++ dml_print("DML: TWait: %f\n", TWait);
++ dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay);
++ dml_print("DML: LineTime: %f\n", LineTime);
++ dml_print("DML: Tsetup: %f\n", Tsetup);
++ dml_print("DML: Tdm: %f\n", Tdm);
++ dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler);
++ dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler);
++ dml_print("DML: HTotal: %d\n", myPipe->HTotal);
++
++ *PrefetchBandwidth = 0;
++ *DestinationLinesToRequestVMInVBlank = 0;
++ *DestinationLinesToRequestRowInVBlank = 0;
++ *VRatioPrefetchY = 0;
++ *VRatioPrefetchC = 0;
++ *RequiredPrefetchPixDataBWLuma = 0;
++ if (*DestinationLinesForPrefetch > 1) {
++ double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
++ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
++ + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1)
++ + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2))
++ / (*DestinationLinesForPrefetch * LineTime - *Tno_bw);
++
++ double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
++ HostVMInefficiencyFactor + PrefetchSourceLinesY *
++ *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
++ PrefetchSourceLinesC * *swath_width_chroma_ub *
++ dml_ceil(BytePerPixelDETC, 2)) /
++ (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 *
++ UrgentLatency * (1 + HostVMDynamicLevels));
++
++ double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow
++ * HostVMInefficiencyFactor + PrefetchSourceLinesY *
++ *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
++ PrefetchSourceLinesC * *swath_width_chroma_ub *
++ dml_ceil(BytePerPixelDETC, 2)) /
++ (*DestinationLinesForPrefetch * LineTime -
++ UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
++ * (HostVMDynamicLevels + 1) - 1));
++
++ double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub *
++ dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC *
++ *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) /
++ (*DestinationLinesForPrefetch * LineTime -
++ UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
++ * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency *
++ (1 + HostVMDynamicLevels));
++
++ if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) {
++ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw);
++ }
++ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
++ *PrefetchBandwidth = PrefetchBandwidth1;
++ } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) {
++ *PrefetchBandwidth = PrefetchBandwidth2;
++ } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
++ *PrefetchBandwidth = PrefetchBandwidth3;
++ } else {
++ *PrefetchBandwidth = PrefetchBandwidth4;
++ }
++
++ if (GPUVMEnable) {
++ TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth,
++ dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4));
++ } else {
++// 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor
++// so if this needs to be reinstated, then it should be officially done in the VBA code as well.
++// if (mode_lib->NumberOfCursors > 0 || XFCEnabled)
++ TimeForFetchingMetaPTE = LineTime / 4;
++// else
++// TimeForFetchingMetaPTE = 0.0;
++ }
++
++ if ((GPUVMEnable == true || DCCEnable == true)) {
++ TimeForFetchingRowInVBlank =
++ dml_max(
++ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
++ / *PrefetchBandwidth,
++ dml_max(
++ UrgentLatency * (1 + HostVMDynamicLevels),
++ dml_max(
++ (LineTime
++ - TimeForFetchingMetaPTE) / 2.0,
++ LineTime
++ / 4.0)));
++ } else {
++// See note above dated 5/30/2018
++// if (NumberOfCursors > 0 || XFCEnabled)
++ TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0;
++// else // TODO: Did someone else add this??
++// TimeForFetchingRowInVBlank = 0.0;
++ }
++
++ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
++
++ *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
++
++ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
++// See note above dated 5/30/2018
++// - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
++ - ((GPUVMEnable || DCCEnable) ?
++ (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) :
++ 0.0); // TODO: Did someone else add this??
++
++ if (LinesToRequestPrefetchPixelData > 0) {
++
++ *VRatioPrefetchY = (double) PrefetchSourceLinesY
++ / LinesToRequestPrefetchPixelData;
++ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
++ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
++ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
++ *VRatioPrefetchY =
++ dml_max(
++ (double) PrefetchSourceLinesY
++ / LinesToRequestPrefetchPixelData,
++ (double) MaxNumSwathY
++ * SwathHeightY
++ / (LinesToRequestPrefetchPixelData
++ - (VInitPreFillY
++ - 3.0)
++ / 2.0));
++ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
++ } else {
++ MyError = true;
++ *VRatioPrefetchY = 0;
++ }
++ }
++
++ *VRatioPrefetchC = (double) PrefetchSourceLinesC
++ / LinesToRequestPrefetchPixelData;
++ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
++
++ if ((SwathHeightC > 4)) {
++ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
++ *VRatioPrefetchC =
++ dml_max(
++ *VRatioPrefetchC,
++ (double) MaxNumSwathC
++ * SwathHeightC
++ / (LinesToRequestPrefetchPixelData
++ - (VInitPreFillC
++ - 3.0)
++ / 2.0));
++ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
++ } else {
++ MyError = true;
++ *VRatioPrefetchC = 0;
++ }
++ }
++
++ *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane
++ * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData
++ * dml_ceil(BytePerPixelDETY, 1)
++ * *swath_width_luma_ub / LineTime;
++ *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane
++ * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData
++ * dml_ceil(BytePerPixelDETC, 2)
++ * *swath_width_chroma_ub / LineTime;
++ } else {
++ MyError = true;
++ *VRatioPrefetchY = 0;
++ *VRatioPrefetchC = 0;
++ *RequiredPrefetchPixDataBWLuma = 0;
++ *RequiredPrefetchPixDataBWChroma = 0;
++ }
++
++ dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE);
++ dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank);
++ dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank);
++ dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime);
++ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
++
++ } else {
++ MyError = true;
++ }
++
++ {
++ double prefetch_vm_bw;
++ double prefetch_row_bw;
++
++ if (PDEAndMetaPTEBytesFrame == 0) {
++ prefetch_vm_bw = 0;
++ } else if (*DestinationLinesToRequestVMInVBlank > 0) {
++ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
++ } else {
++ prefetch_vm_bw = 0;
++ MyError = true;
++ }
++ if (MetaRowByte + PixelPTEBytesPerRow == 0) {
++ prefetch_row_bw = 0;
++ } else if (*DestinationLinesToRequestRowInVBlank > 0) {
++ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
++ } else {
++ prefetch_row_bw = 0;
++ MyError = true;
++ }
++
++ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
++ }
++
++ if (MyError) {
++ *PrefetchBandwidth = 0;
++ TimeForFetchingMetaPTE = 0;
++ TimeForFetchingRowInVBlank = 0;
++ *DestinationLinesToRequestVMInVBlank = 0;
++ *DestinationLinesToRequestRowInVBlank = 0;
++ *DestinationLinesForPrefetch = 0;
++ LinesToRequestPrefetchPixelData = 0;
++ *VRatioPrefetchY = 0;
++ *VRatioPrefetchC = 0;
++ *RequiredPrefetchPixDataBWLuma = 0;
++ *RequiredPrefetchPixDataBWChroma = 0;
++ }
++
++ return MyError;
++}
++
++static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
++{
++ return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
++}
++
++static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
++{
++ return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1);
++}
++
++static double CalculateDCCConfiguration(
++ bool DCCEnabled,
++ bool DCCProgrammingAssumesScanDirectionUnknown,
++ unsigned int ViewportWidth,
++ unsigned int ViewportHeight,
++ double DETBufferSize,
++ unsigned int RequestHeight256Byte,
++ unsigned int SwathHeight,
++ enum dm_swizzle_mode TilingFormat,
++ unsigned int BytePerPixel,
++ enum scan_direction_class ScanOrientation,
++ unsigned int *MaxUncompressedBlock,
++ unsigned int *MaxCompressedBlock,
++ unsigned int *Independent64ByteBlock)
++{
++ double MaximumDCCCompressionSurface = 0.0;
++ enum {
++ REQ_256Bytes,
++ REQ_128BytesNonContiguous,
++ REQ_128BytesContiguous,
++ REQ_NA
++ } Request = REQ_NA;
++
++ if (DCCEnabled == true) {
++ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
++ if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel
++ && DETBufferSize
++ >= 256 / RequestHeight256Byte
++ * ViewportHeight) {
++ Request = REQ_256Bytes;
++ } else if ((DETBufferSize
++ < RequestHeight256Byte * ViewportWidth * BytePerPixel
++ && (BytePerPixel == 2 || BytePerPixel == 4))
++ || (DETBufferSize
++ < 256 / RequestHeight256Byte
++ * ViewportHeight
++ && BytePerPixel == 8
++ && (TilingFormat == dm_sw_4kb_d
++ || TilingFormat
++ == dm_sw_4kb_d_x
++ || TilingFormat
++ == dm_sw_var_d
++ || TilingFormat
++ == dm_sw_var_d_x
++ || TilingFormat
++ == dm_sw_64kb_d
++ || TilingFormat
++ == dm_sw_64kb_d_x
++ || TilingFormat
++ == dm_sw_64kb_d_t
++ || TilingFormat
++ == dm_sw_64kb_r_x))) {
++ Request = REQ_128BytesNonContiguous;
++ } else {
++ Request = REQ_128BytesContiguous;
++ }
++ } else {
++ if (BytePerPixel == 1) {
++ if (ScanOrientation == dm_vert || SwathHeight == 16) {
++ Request = REQ_256Bytes;
++ } else {
++ Request = REQ_128BytesContiguous;
++ }
++ } else if (BytePerPixel == 2) {
++ if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) {
++ Request = REQ_256Bytes;
++ } else if (ScanOrientation == dm_vert) {
++ Request = REQ_128BytesContiguous;
++ } else {
++ Request = REQ_128BytesNonContiguous;
++ }
++ } else if (BytePerPixel == 4) {
++ if (SwathHeight == 8) {
++ Request = REQ_256Bytes;
++ } else if (ScanOrientation == dm_vert) {
++ Request = REQ_128BytesContiguous;
++ } else {
++ Request = REQ_128BytesNonContiguous;
++ }
++ } else if (BytePerPixel == 8) {
++ if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x
++ || TilingFormat == dm_sw_var_d
++ || TilingFormat == dm_sw_var_d_x
++ || TilingFormat == dm_sw_64kb_d
++ || TilingFormat == dm_sw_64kb_d_x
++ || TilingFormat == dm_sw_64kb_d_t
++ || TilingFormat == dm_sw_64kb_r_x) {
++ if ((ScanOrientation == dm_vert && SwathHeight == 8)
++ || (ScanOrientation != dm_vert
++ && SwathHeight == 4)) {
++ Request = REQ_256Bytes;
++ } else if (ScanOrientation != dm_vert) {
++ Request = REQ_128BytesContiguous;
++ } else {
++ Request = REQ_128BytesNonContiguous;
++ }
++ } else {
++ if (ScanOrientation != dm_vert || SwathHeight == 8) {
++ Request = REQ_256Bytes;
++ } else {
++ Request = REQ_128BytesContiguous;
++ }
++ }
++ }
++ }
++ } else {
++ Request = REQ_NA;
++ }
++
++ if (Request == REQ_256Bytes) {
++ *MaxUncompressedBlock = 256;
++ *MaxCompressedBlock = 256;
++ *Independent64ByteBlock = false;
++ MaximumDCCCompressionSurface = 4.0;
++ } else if (Request == REQ_128BytesContiguous) {
++ *MaxUncompressedBlock = 128;
++ *MaxCompressedBlock = 128;
++ *Independent64ByteBlock = false;
++ MaximumDCCCompressionSurface = 2.0;
++ } else if (Request == REQ_128BytesNonContiguous) {
++ *MaxUncompressedBlock = 256;
++ *MaxCompressedBlock = 64;
++ *Independent64ByteBlock = true;
++ MaximumDCCCompressionSurface = 4.0;
++ } else {
++ *MaxUncompressedBlock = 0;
++ *MaxCompressedBlock = 0;
++ *Independent64ByteBlock = 0;
++ MaximumDCCCompressionSurface = 0.0;
++ }
++
++ return MaximumDCCCompressionSurface;
++}
++
++static double CalculatePrefetchSourceLines(
++ struct display_mode_lib *mode_lib,
++ double VRatio,
++ double vtaps,
++ bool Interlace,
++ bool ProgressiveToInterlaceUnitInOPP,
++ unsigned int SwathHeight,
++ unsigned int ViewportYStart,
++ double *VInitPreFill,
++ unsigned int *MaxNumSwath)
++{
++ unsigned int MaxPartialSwath;
++
++ if (ProgressiveToInterlaceUnitInOPP)
++ *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
++ else
++ *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
++
++ if (!mode_lib->vba.IgnoreViewportPositioning) {
++
++ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
++
++ if (*VInitPreFill > 1.0)
++ MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
++ else
++ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
++ % SwathHeight;
++ MaxPartialSwath = dml_max(1U, MaxPartialSwath);
++
++ } else {
++
++ if (ViewportYStart != 0)
++ dml_print(
++ "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
++
++ *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
++
++ if (*VInitPreFill > 1.0)
++ MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
++ else
++ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
++ % SwathHeight;
++ }
++
++ return *MaxNumSwath * SwathHeight + MaxPartialSwath;
++}
++
++static unsigned int CalculateVMAndRowBytes(
++ struct display_mode_lib *mode_lib,
++ bool DCCEnable,
++ unsigned int BlockHeight256Bytes,
++ unsigned int BlockWidth256Bytes,
++ enum source_format_class SourcePixelFormat,
++ unsigned int SurfaceTiling,
++ unsigned int BytePerPixel,
++ enum scan_direction_class ScanDirection,
++ unsigned int ViewportWidth,
++ unsigned int ViewportHeight,
++ unsigned int SwathWidth,
++ bool GPUVMEnable,
++ bool HostVMEnable,
++ unsigned int HostVMMaxPageTableLevels,
++ unsigned int HostVMCachedPageTableLevels,
++ unsigned int VMMPageSize,
++ unsigned int PTEBufferSizeInRequests,
++ unsigned int Pitch,
++ unsigned int DCCMetaPitch,
++ unsigned int *MacroTileWidth,
++ unsigned int *MetaRowByte,
++ unsigned int *PixelPTEBytesPerRow,
++ bool *PTEBufferSizeNotExceeded,
++ unsigned int *dpte_row_width_ub,
++ unsigned int *dpte_row_height,
++ unsigned int *MetaRequestWidth,
++ unsigned int *MetaRequestHeight,
++ unsigned int *meta_row_width,
++ unsigned int *meta_row_height,
++ unsigned int *vm_group_bytes,
++ long *dpte_group_bytes,
++ unsigned int *PixelPTEReqWidth,
++ unsigned int *PixelPTEReqHeight,
++ unsigned int *PTERequestSize,
++ unsigned int *DPDE0BytesFrame,
++ unsigned int *MetaPTEBytesFrame)
++{
++ unsigned int MPDEBytesFrame;
++ unsigned int DCCMetaSurfaceBytes;
++ unsigned int MacroTileSizeBytes;
++ unsigned int MacroTileHeight;
++ unsigned int ExtraDPDEBytesFrame;
++ unsigned int PDEAndMetaPTEBytesFrame;
++ unsigned int PixelPTEReqHeightPTEs;
++
++ if (DCCEnable == true) {
++ *MetaRequestHeight = 8 * BlockHeight256Bytes;
++ *MetaRequestWidth = 8 * BlockWidth256Bytes;
++ if (ScanDirection == dm_horz) {
++ *meta_row_height = *MetaRequestHeight;
++ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
++ + *MetaRequestWidth;
++ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
++ } else {
++ *meta_row_height = *MetaRequestWidth;
++ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
++ + *MetaRequestHeight;
++ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
++ }
++ if (ScanDirection == dm_horz) {
++ DCCMetaSurfaceBytes = DCCMetaPitch
++ * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
++ + 64 * BlockHeight256Bytes) * BytePerPixel
++ / 256;
++ } else {
++ DCCMetaSurfaceBytes = DCCMetaPitch
++ * (dml_ceil(
++ (double) ViewportHeight - 1,
++ 64 * BlockHeight256Bytes)
++ + 64 * BlockHeight256Bytes) * BytePerPixel
++ / 256;
++ }
++ if (GPUVMEnable == true) {
++ *MetaPTEBytesFrame = (dml_ceil(
++ (double) (DCCMetaSurfaceBytes - VMMPageSize)
++ / (8 * VMMPageSize),
++ 1) + 1) * 64;
++ MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2);
++ } else {
++ *MetaPTEBytesFrame = 0;
++ MPDEBytesFrame = 0;
++ }
++ } else {
++ *MetaPTEBytesFrame = 0;
++ MPDEBytesFrame = 0;
++ *MetaRowByte = 0;
++ }
++
++ if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
++ MacroTileSizeBytes = 256;
++ MacroTileHeight = BlockHeight256Bytes;
++ } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
++ || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) {
++ MacroTileSizeBytes = 4096;
++ MacroTileHeight = 4 * BlockHeight256Bytes;
++ } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t
++ || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d
++ || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x
++ || SurfaceTiling == dm_sw_64kb_r_x) {
++ MacroTileSizeBytes = 65536;
++ MacroTileHeight = 16 * BlockHeight256Bytes;
++ } else {
++ MacroTileSizeBytes = 262144;
++ MacroTileHeight = 32 * BlockHeight256Bytes;
++ }
++ *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
++
++ if (GPUVMEnable == true && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) {
++ if (ScanDirection == dm_horz) {
++ *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
++ } else {
++ *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
++ }
++ ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3);
++ } else {
++ *DPDE0BytesFrame = 0;
++ ExtraDPDEBytesFrame = 0;
++ }
++
++ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
++ + ExtraDPDEBytesFrame;
++
++ if (HostVMEnable == true) {
++ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
++ }
++
++ if (GPUVMEnable == true) {
++ double FractionOfPTEReturnDrop;
++
++ if (SurfaceTiling == dm_sw_linear) {
++ PixelPTEReqHeightPTEs = 1;
++ *PixelPTEReqHeight = 1;
++ *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel;
++ *PTERequestSize = 64;
++ FractionOfPTEReturnDrop = 0;
++ } else if (MacroTileSizeBytes == 4096) {
++ PixelPTEReqHeightPTEs = 1;
++ *PixelPTEReqHeight = MacroTileHeight;
++ *PixelPTEReqWidth = 8 * *MacroTileWidth;
++ *PTERequestSize = 64;
++ if (ScanDirection == dm_horz)
++ FractionOfPTEReturnDrop = 0;
++ else
++ FractionOfPTEReturnDrop = 7 / 8;
++ } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
++ PixelPTEReqHeightPTEs = 16;
++ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
++ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
++ *PTERequestSize = 128;
++ FractionOfPTEReturnDrop = 0;
++ } else {
++ PixelPTEReqHeightPTEs = 1;
++ *PixelPTEReqHeight = MacroTileHeight;
++ *PixelPTEReqWidth = 8 * *MacroTileWidth;
++ *PTERequestSize = 64;
++ FractionOfPTEReturnDrop = 0;
++ }
++
++ if (SurfaceTiling == dm_sw_linear) {
++ *dpte_row_height = dml_min(128,
++ 1 << (unsigned int) dml_floor(
++ dml_log2(
++ (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch),
++ 1));
++ *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
++ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
++ } else if (ScanDirection == dm_horz) {
++ *dpte_row_height = *PixelPTEReqHeight;
++ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
++ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
++ } else {
++ *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
++ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
++ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
++ }
++ if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
++ <= 64 * PTEBufferSizeInRequests) {
++ *PTEBufferSizeNotExceeded = true;
++ } else {
++ *PTEBufferSizeNotExceeded = false;
++ }
++ } else {
++ *PixelPTEBytesPerRow = 0;
++ *PTEBufferSizeNotExceeded = true;
++ }
++ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame);
++
++ if (HostVMEnable == true) {
++ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
++ }
++
++ if (HostVMEnable == true) {
++ *vm_group_bytes = 512;
++ *dpte_group_bytes = 512;
++ } else if (GPUVMEnable == true) {
++ *vm_group_bytes = 2048;
++ if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) {
++ *dpte_group_bytes = 512;
++ } else {
++ *dpte_group_bytes = 2048;
++ }
++ } else {
++ *vm_group_bytes = 0;
++ *dpte_group_bytes = 0;
++ }
++
++ return PDEAndMetaPTEBytesFrame;
++}
++
++static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
++ struct display_mode_lib *mode_lib)
++{
++ struct vba_vars_st *locals = &mode_lib->vba;
++ unsigned int j, k;
++
++ mode_lib->vba.WritebackDISPCLK = 0.0;
++ mode_lib->vba.DISPCLKWithRamping = 0;
++ mode_lib->vba.DISPCLKWithoutRamping = 0;
++ mode_lib->vba.GlobalDPPCLK = 0.0;
++
++ // DISPCLK and DPPCLK Calculation
++ //
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.WritebackEnable[k]) {
++ mode_lib->vba.WritebackDISPCLK =
++ dml_max(
++ mode_lib->vba.WritebackDISPCLK,
++ CalculateWriteBackDISPCLK(
++ mode_lib->vba.WritebackPixelFormat[k],
++ mode_lib->vba.PixelClock[k],
++ mode_lib->vba.WritebackHRatio[k],
++ mode_lib->vba.WritebackVRatio[k],
++ mode_lib->vba.WritebackLumaHTaps[k],
++ mode_lib->vba.WritebackLumaVTaps[k],
++ mode_lib->vba.WritebackChromaHTaps[k],
++ mode_lib->vba.WritebackChromaVTaps[k],
++ mode_lib->vba.WritebackDestinationWidth[k],
++ mode_lib->vba.HTotal[k],
++ mode_lib->vba.WritebackChromaLineBufferWidth));
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.HRatio[k] > 1) {
++ locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput
++ * mode_lib->vba.HRatio[k]
++ / dml_ceil(
++ mode_lib->vba.htaps[k]
++ / 6.0,
++ 1));
++ } else {
++ locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput);
++ }
++
++ mode_lib->vba.DPPCLKUsingSingleDPPLuma =
++ mode_lib->vba.PixelClock[k]
++ * dml_max(
++ mode_lib->vba.vtaps[k] / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]),
++ dml_max(
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / locals->PSCL_THROUGHPUT_LUMA[k],
++ 1.0));
++
++ if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6)
++ && mode_lib->vba.DPPCLKUsingSingleDPPLuma
++ < 2 * mode_lib->vba.PixelClock[k]) {
++ mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k];
++ }
++
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
++ locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
++ locals->DPPCLKUsingSingleDPP[k] =
++ mode_lib->vba.DPPCLKUsingSingleDPPLuma;
++ } else {
++ if (mode_lib->vba.HRatio[k] > 1) {
++ locals->PSCL_THROUGHPUT_CHROMA[k] =
++ dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput
++ * mode_lib->vba.HRatio[k]
++ / 2
++ / dml_ceil(
++ mode_lib->vba.HTAPsChroma[k]
++ / 6.0,
++ 1.0));
++ } else {
++ locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput);
++ }
++ mode_lib->vba.DPPCLKUsingSingleDPPChroma =
++ mode_lib->vba.PixelClock[k]
++ * dml_max(
++ mode_lib->vba.VTAPsChroma[k]
++ / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]
++ / 2),
++ dml_max(
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / 4
++ / locals->PSCL_THROUGHPUT_CHROMA[k],
++ 1.0));
++
++ if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6)
++ && mode_lib->vba.DPPCLKUsingSingleDPPChroma
++ < 2 * mode_lib->vba.PixelClock[k]) {
++ mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2
++ * mode_lib->vba.PixelClock[k];
++ }
++
++ locals->DPPCLKUsingSingleDPP[k] = dml_max(
++ mode_lib->vba.DPPCLKUsingSingleDPPLuma,
++ mode_lib->vba.DPPCLKUsingSingleDPPChroma);
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.BlendingAndTiming[k] != k)
++ continue;
++ if (mode_lib->vba.ODMCombineEnabled[k]) {
++ mode_lib->vba.DISPCLKWithRamping =
++ dml_max(
++ mode_lib->vba.DISPCLKWithRamping,
++ mode_lib->vba.PixelClock[k] / 2
++ * (1
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100)
++ * (1
++ + mode_lib->vba.DISPCLKRampingMargin
++ / 100));
++ mode_lib->vba.DISPCLKWithoutRamping =
++ dml_max(
++ mode_lib->vba.DISPCLKWithoutRamping,
++ mode_lib->vba.PixelClock[k] / 2
++ * (1
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100));
++ } else if (!mode_lib->vba.ODMCombineEnabled[k]) {
++ mode_lib->vba.DISPCLKWithRamping =
++ dml_max(
++ mode_lib->vba.DISPCLKWithRamping,
++ mode_lib->vba.PixelClock[k]
++ * (1
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100)
++ * (1
++ + mode_lib->vba.DISPCLKRampingMargin
++ / 100));
++ mode_lib->vba.DISPCLKWithoutRamping =
++ dml_max(
++ mode_lib->vba.DISPCLKWithoutRamping,
++ mode_lib->vba.PixelClock[k]
++ * (1
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100));
++ }
++ }
++
++ mode_lib->vba.DISPCLKWithRamping = dml_max(
++ mode_lib->vba.DISPCLKWithRamping,
++ mode_lib->vba.WritebackDISPCLK);
++ mode_lib->vba.DISPCLKWithoutRamping = dml_max(
++ mode_lib->vba.DISPCLKWithoutRamping,
++ mode_lib->vba.WritebackDISPCLK);
++
++ ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0);
++ mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
++ mode_lib->vba.DISPCLKWithRamping,
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
++ mode_lib->vba.DISPCLKWithoutRamping,
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
++ mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz,
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity
++ > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
++ mode_lib->vba.DISPCLK_calculated =
++ mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity;
++ } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity
++ > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
++ mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity;
++ } else {
++ mode_lib->vba.DISPCLK_calculated =
++ mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity;
++ }
++ DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated);
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k]
++ / mode_lib->vba.DPPPerPlane[k]
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
++ mode_lib->vba.GlobalDPPCLK = dml_max(
++ mode_lib->vba.GlobalDPPCLK,
++ mode_lib->vba.DPPCLK_calculated[k]);
++ }
++ mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp(
++ mode_lib->vba.GlobalDPPCLK,
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255
++ * dml_ceil(
++ mode_lib->vba.DPPCLK_calculated[k] * 255
++ / mode_lib->vba.GlobalDPPCLK,
++ 1);
++ DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]);
++ }
++
++ // Urgent and B P-State/DRAM Clock Change Watermark
++ DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK);
++ DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN);
++ DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW);
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ bool MainPlaneDoesODMCombine = false;
++
++ if (mode_lib->vba.SourceScan[k] == dm_horz)
++ locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k];
++ else
++ locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
++
++ if (mode_lib->vba.ODMCombineEnabled[k] == true)
++ MainPlaneDoesODMCombine = true;
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
++ if (mode_lib->vba.BlendingAndTiming[k] == j
++ && mode_lib->vba.ODMCombineEnabled[j] == true)
++ MainPlaneDoesODMCombine = true;
++
++ if (MainPlaneDoesODMCombine == true)
++ locals->SwathWidthY[k] = dml_min(
++ (double) locals->SwathWidthSingleDPPY[k],
++ dml_round(
++ mode_lib->vba.HActive[k] / 2.0
++ * mode_lib->vba.HRatio[k]));
++ else
++ locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k]
++ / mode_lib->vba.DPPPerPlane[k];
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
++ locals->BytePerPixelDETY[k] = 8;
++ locals->BytePerPixelDETC[k] = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
++ locals->BytePerPixelDETY[k] = 4;
++ locals->BytePerPixelDETC[k] = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
++ locals->BytePerPixelDETY[k] = 2;
++ locals->BytePerPixelDETC[k] = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
++ locals->BytePerPixelDETY[k] = 1;
++ locals->BytePerPixelDETC[k] = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
++ locals->BytePerPixelDETY[k] = 1;
++ locals->BytePerPixelDETC[k] = 2;
++ } else { // dm_420_10
++ locals->BytePerPixelDETY[k] = 4.0 / 3.0;
++ locals->BytePerPixelDETC[k] = 8.0 / 3.0;
++ }
++ }
++
++ mode_lib->vba.TotalDataReadBandwidth = 0.0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k]
++ * dml_ceil(locals->BytePerPixelDETY[k], 1)
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
++ * mode_lib->vba.VRatio[k];
++ locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k]
++ / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2)
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
++ * mode_lib->vba.VRatio[k] / 2;
++ DTRACE(
++ " read_bw[%i] = %fBps",
++ k,
++ locals->ReadBandwidthPlaneLuma[k]
++ + locals->ReadBandwidthPlaneChroma[k]);
++ mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k]
++ + locals->ReadBandwidthPlaneChroma[k];
++ }
++
++ // DCFCLK Deep Sleep
++ CalculateDCFCLKDeepSleep(
++ mode_lib,
++ mode_lib->vba.NumberOfActivePlanes,
++ locals->BytePerPixelDETY,
++ locals->BytePerPixelDETC,
++ mode_lib->vba.VRatio,
++ locals->SwathWidthY,
++ mode_lib->vba.DPPPerPlane,
++ mode_lib->vba.HRatio,
++ mode_lib->vba.PixelClock,
++ locals->PSCL_THROUGHPUT_LUMA,
++ locals->PSCL_THROUGHPUT_CHROMA,
++ locals->DPPCLK,
++ &mode_lib->vba.DCFCLKDeepSleep);
++
++ // DSCCLK
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
++ locals->DSCCLK_calculated[k] = 0.0;
++ } else {
++ if (mode_lib->vba.OutputFormat[k] == dm_420
++ || mode_lib->vba.OutputFormat[k] == dm_n422)
++ mode_lib->vba.DSCFormatFactor = 2;
++ else
++ mode_lib->vba.DSCFormatFactor = 1;
++ if (mode_lib->vba.ODMCombineEnabled[k])
++ locals->DSCCLK_calculated[k] =
++ mode_lib->vba.PixelClockBackEnd[k] / 6
++ / mode_lib->vba.DSCFormatFactor
++ / (1
++ - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100);
++ else
++ locals->DSCCLK_calculated[k] =
++ mode_lib->vba.PixelClockBackEnd[k] / 3
++ / mode_lib->vba.DSCFormatFactor
++ / (1
++ - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100);
++ }
++ }
++
++ // DSC Delay
++ // TODO
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ double bpp = mode_lib->vba.OutputBpp[k];
++ unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k];
++
++ if (mode_lib->vba.DSCEnabled[k] && bpp != 0) {
++ if (!mode_lib->vba.ODMCombineEnabled[k]) {
++ locals->DSCDelay[k] =
++ dscceComputeDelay(
++ mode_lib->vba.DSCInputBitPerComponent[k],
++ bpp,
++ dml_ceil(
++ (double) mode_lib->vba.HActive[k]
++ / mode_lib->vba.NumberOfDSCSlices[k],
++ 1),
++ slices,
++ mode_lib->vba.OutputFormat[k])
++ + dscComputeDelay(
++ mode_lib->vba.OutputFormat[k]);
++ } else {
++ locals->DSCDelay[k] =
++ 2
++ * (dscceComputeDelay(
++ mode_lib->vba.DSCInputBitPerComponent[k],
++ bpp,
++ dml_ceil(
++ (double) mode_lib->vba.HActive[k]
++ / mode_lib->vba.NumberOfDSCSlices[k],
++ 1),
++ slices / 2.0,
++ mode_lib->vba.OutputFormat[k])
++ + dscComputeDelay(
++ mode_lib->vba.OutputFormat[k]));
++ }
++ locals->DSCDelay[k] = locals->DSCDelay[k]
++ * mode_lib->vba.PixelClock[k]
++ / mode_lib->vba.PixelClockBackEnd[k];
++ } else {
++ locals->DSCDelay[k] = 0;
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes
++ if (j != k && mode_lib->vba.BlendingAndTiming[k] == j
++ && mode_lib->vba.DSCEnabled[j])
++ locals->DSCDelay[k] = locals->DSCDelay[j];
++
++ // Prefetch
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ unsigned int PDEAndMetaPTEBytesFrameY;
++ unsigned int PixelPTEBytesPerRowY;
++ unsigned int MetaRowByteY;
++ unsigned int MetaRowByteC;
++ unsigned int PDEAndMetaPTEBytesFrameC;
++ unsigned int PixelPTEBytesPerRowC;
++ bool PTEBufferSizeNotExceededY;
++ bool PTEBufferSizeNotExceededC;
++
++ Calculate256BBlockSizes(
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(locals->BytePerPixelDETY[k], 1),
++ dml_ceil(locals->BytePerPixelDETC[k], 2),
++ &locals->BlockHeight256BytesY[k],
++ &locals->BlockHeight256BytesC[k],
++ &locals->BlockWidth256BytesY[k],
++ &locals->BlockWidth256BytesC[k]);
++
++ locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.vtaps[k],
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ mode_lib->vba.SwathHeightY[k],
++ mode_lib->vba.ViewportYStartY[k],
++ &locals->VInitPreFillY[k],
++ &locals->MaxNumSwathY[k]);
++
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) {
++ PDEAndMetaPTEBytesFrameC =
++ CalculateVMAndRowBytes(
++ mode_lib,
++ mode_lib->vba.DCCEnable[k],
++ locals->BlockHeight256BytesC[k],
++ locals->BlockWidth256BytesC[k],
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(
++ locals->BytePerPixelDETC[k],
++ 2),
++ mode_lib->vba.SourceScan[k],
++ mode_lib->vba.ViewportWidth[k] / 2,
++ mode_lib->vba.ViewportHeight[k] / 2,
++ locals->SwathWidthY[k] / 2,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.HostVMEnable,
++ mode_lib->vba.HostVMMaxPageTableLevels,
++ mode_lib->vba.HostVMCachedPageTableLevels,
++ mode_lib->vba.VMMPageSize,
++ mode_lib->vba.PTEBufferSizeInRequestsChroma,
++ mode_lib->vba.PitchC[k],
++ mode_lib->vba.DCCMetaPitchC[k],
++ &locals->MacroTileWidthC[k],
++ &MetaRowByteC,
++ &PixelPTEBytesPerRowC,
++ &PTEBufferSizeNotExceededC,
++ &locals->dpte_row_width_chroma_ub[k],
++ &locals->dpte_row_height_chroma[k],
++ &locals->meta_req_width_chroma[k],
++ &locals->meta_req_height_chroma[k],
++ &locals->meta_row_width_chroma[k],
++ &locals->meta_row_height_chroma[k],
++ &locals->vm_group_bytes_chroma,
++ &locals->dpte_group_bytes_chroma,
++ &locals->PixelPTEReqWidthC[k],
++ &locals->PixelPTEReqHeightC[k],
++ &locals->PTERequestSizeC[k],
++ &locals->dpde0_bytes_per_frame_ub_c[k],
++ &locals->meta_pte_bytes_per_frame_ub_c[k]);
++
++ locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
++ mode_lib,
++ mode_lib->vba.VRatio[k] / 2,
++ mode_lib->vba.VTAPsChroma[k],
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ mode_lib->vba.SwathHeightC[k],
++ mode_lib->vba.ViewportYStartC[k],
++ &locals->VInitPreFillC[k],
++ &locals->MaxNumSwathC[k]);
++ } else {
++ PixelPTEBytesPerRowC = 0;
++ PDEAndMetaPTEBytesFrameC = 0;
++ MetaRowByteC = 0;
++ locals->MaxNumSwathC[k] = 0;
++ locals->PrefetchSourceLinesC[k] = 0;
++ locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
++ }
++
++ PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
++ mode_lib,
++ mode_lib->vba.DCCEnable[k],
++ locals->BlockHeight256BytesY[k],
++ locals->BlockWidth256BytesY[k],
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(locals->BytePerPixelDETY[k], 1),
++ mode_lib->vba.SourceScan[k],
++ mode_lib->vba.ViewportWidth[k],
++ mode_lib->vba.ViewportHeight[k],
++ locals->SwathWidthY[k],
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.HostVMEnable,
++ mode_lib->vba.HostVMMaxPageTableLevels,
++ mode_lib->vba.HostVMCachedPageTableLevels,
++ mode_lib->vba.VMMPageSize,
++ locals->PTEBufferSizeInRequestsForLuma,
++ mode_lib->vba.PitchY[k],
++ mode_lib->vba.DCCMetaPitchY[k],
++ &locals->MacroTileWidthY[k],
++ &MetaRowByteY,
++ &PixelPTEBytesPerRowY,
++ &PTEBufferSizeNotExceededY,
++ &locals->dpte_row_width_luma_ub[k],
++ &locals->dpte_row_height[k],
++ &locals->meta_req_width[k],
++ &locals->meta_req_height[k],
++ &locals->meta_row_width[k],
++ &locals->meta_row_height[k],
++ &locals->vm_group_bytes[k],
++ &locals->dpte_group_bytes[k],
++ &locals->PixelPTEReqWidthY[k],
++ &locals->PixelPTEReqHeightY[k],
++ &locals->PTERequestSizeY[k],
++ &locals->dpde0_bytes_per_frame_ub_l[k],
++ &locals->meta_pte_bytes_per_frame_ub_l[k]);
++
++ locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
++ locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
++ + PDEAndMetaPTEBytesFrameC;
++ locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
++
++ CalculateActiveRowBandwidth(
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ MetaRowByteY,
++ MetaRowByteC,
++ locals->meta_row_height[k],
++ locals->meta_row_height_chroma[k],
++ PixelPTEBytesPerRowY,
++ PixelPTEBytesPerRowC,
++ locals->dpte_row_height[k],
++ locals->dpte_row_height_chroma[k],
++ &locals->meta_row_bw[k],
++ &locals->dpte_row_bw[k]);
++ }
++
++ mode_lib->vba.TotalDCCActiveDPP = 0;
++ mode_lib->vba.TotalActiveDPP = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP
++ + mode_lib->vba.DPPPerPlane[k];
++ if (mode_lib->vba.DCCEnable[k])
++ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
++ + mode_lib->vba.DPPPerPlane[k];
++ }
++
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
++
++ mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency =
++ (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK
++ + mode_lib->vba.UrgentOutOfOrderReturnPerChannel
++ * mode_lib->vba.NumberOfChannels
++ / mode_lib->vba.ReturnBW;
++
++ mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency(
++ mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency,
++ mode_lib->vba.TotalActiveDPP,
++ mode_lib->vba.PixelChunkSizeInKByte,
++ mode_lib->vba.TotalDCCActiveDPP,
++ mode_lib->vba.MetaChunkSize,
++ mode_lib->vba.ReturnBW,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.HostVMEnable,
++ mode_lib->vba.NumberOfActivePlanes,
++ mode_lib->vba.DPPPerPlane,
++ locals->dpte_group_bytes,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ mode_lib->vba.HostVMMaxPageTableLevels,
++ mode_lib->vba.HostVMCachedPageTableLevels);
++
++
++ mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep;
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
++ mode_lib->vba.WritebackLatency
++ + CalculateWriteBackDelay(
++ mode_lib->vba.WritebackPixelFormat[k],
++ mode_lib->vba.WritebackHRatio[k],
++ mode_lib->vba.WritebackVRatio[k],
++ mode_lib->vba.WritebackLumaHTaps[k],
++ mode_lib->vba.WritebackLumaVTaps[k],
++ mode_lib->vba.WritebackChromaHTaps[k],
++ mode_lib->vba.WritebackChromaVTaps[k],
++ mode_lib->vba.WritebackDestinationWidth[k])
++ / mode_lib->vba.DISPCLK;
++ } else
++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
++ if (mode_lib->vba.BlendingAndTiming[j] == k
++ && mode_lib->vba.WritebackEnable[j] == true) {
++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
++ dml_max(
++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k],
++ mode_lib->vba.WritebackLatency
++ + CalculateWriteBackDelay(
++ mode_lib->vba.WritebackPixelFormat[j],
++ mode_lib->vba.WritebackHRatio[j],
++ mode_lib->vba.WritebackVRatio[j],
++ mode_lib->vba.WritebackLumaHTaps[j],
++ mode_lib->vba.WritebackLumaVTaps[j],
++ mode_lib->vba.WritebackChromaHTaps[j],
++ mode_lib->vba.WritebackChromaVTaps[j],
++ mode_lib->vba.WritebackDestinationWidth[j])
++ / mode_lib->vba.DISPCLK);
++ }
++ }
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
++ if (mode_lib->vba.BlendingAndTiming[k] == j)
++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][j];
++
++ mode_lib->vba.VStartupLines = 13;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1));
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
++ locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]);
++
++ // We don't really care to iterate between the various prefetch modes
++ //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode);
++ mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly);
++
++ do {
++ double MaxTotalRDBandwidth = 0;
++ double MaxTotalRDBandwidthNoUrgentBurst = 0;
++ bool DestinationLineTimesForPrefetchLessThan2 = false;
++ bool VRatioPrefetchMoreThan4 = false;
++ double TWait = CalculateTWait(
++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
++ mode_lib->vba.DRAMClockChangeLatency,
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.SREnterPlusExitTime);
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ Pipe myPipe;
++ HostVM myHostVM;
++
++ if (mode_lib->vba.XFCEnabled[k] == true) {
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay =
++ CalculateRemoteSurfaceFlipDelay(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ locals->SwathWidthY[k],
++ dml_ceil(
++ locals->BytePerPixelDETY[k],
++ 1),
++ mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.XFCTSlvVupdateOffset,
++ mode_lib->vba.XFCTSlvVupdateWidth,
++ mode_lib->vba.XFCTSlvVreadyOffset,
++ mode_lib->vba.XFCXBUFLatencyTolerance,
++ mode_lib->vba.XFCFillBWOverhead,
++ mode_lib->vba.XFCSlvChunkSize,
++ mode_lib->vba.XFCBusTransportTime,
++ mode_lib->vba.TCalc,
++ TWait,
++ &mode_lib->vba.SrcActiveDrainRate,
++ &mode_lib->vba.TInitXFill,
++ &mode_lib->vba.TslvChk);
++ } else {
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0;
++ }
++
++ myPipe.DPPCLK = locals->DPPCLK[k];
++ myPipe.DISPCLK = mode_lib->vba.DISPCLK;
++ myPipe.PixelClock = mode_lib->vba.PixelClock[k];
++ myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep;
++ myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k];
++ myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
++ myPipe.SourceScan = mode_lib->vba.SourceScan[k];
++ myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
++ myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
++ myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
++ myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
++ myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
++ myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
++ myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
++ myPipe.HTotal = mode_lib->vba.HTotal[k];
++
++
++ myHostVM.Enable = mode_lib->vba.HostVMEnable;
++ myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
++ myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
++
++ mode_lib->vba.ErrorResult[k] =
++ CalculatePrefetchSchedule(
++ mode_lib,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ &myPipe,
++ locals->DSCDelay[k],
++ mode_lib->vba.DPPCLKDelaySubtotal,
++ mode_lib->vba.DPPCLKDelaySCL,
++ mode_lib->vba.DPPCLKDelaySCLLBOnly,
++ mode_lib->vba.DPPCLKDelayCNVCFormater,
++ mode_lib->vba.DPPCLKDelayCNVCCursor,
++ mode_lib->vba.DISPCLKDelaySubtotal,
++ (unsigned int) (locals->SwathWidthY[k]
++ / mode_lib->vba.HRatio[k]),
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.MaxInterDCNTileRepeaters,
++ dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]),
++ locals->MaxVStartupLines[k],
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.GPUVMEnable,
++ &myHostVM,
++ mode_lib->vba.DynamicMetadataEnable[k],
++ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
++ mode_lib->vba.DynamicMetadataTransmittedBytes[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.UrgentExtraLatency,
++ mode_lib->vba.TCalc,
++ locals->PDEAndMetaPTEBytesFrame[k],
++ locals->MetaRowByte[k],
++ locals->PixelPTEBytesPerRow[k],
++ locals->PrefetchSourceLinesY[k],
++ locals->SwathWidthY[k],
++ locals->BytePerPixelDETY[k],
++ locals->VInitPreFillY[k],
++ locals->MaxNumSwathY[k],
++ locals->PrefetchSourceLinesC[k],
++ locals->BytePerPixelDETC[k],
++ locals->VInitPreFillC[k],
++ locals->MaxNumSwathC[k],
++ mode_lib->vba.SwathHeightY[k],
++ mode_lib->vba.SwathHeightC[k],
++ TWait,
++ mode_lib->vba.XFCEnabled[k],
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay,
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ &locals->DSTXAfterScaler[k],
++ &locals->DSTYAfterScaler[k],
++ &locals->DestinationLinesForPrefetch[k],
++ &locals->PrefetchBandwidth[k],
++ &locals->DestinationLinesToRequestVMInVBlank[k],
++ &locals->DestinationLinesToRequestRowInVBlank[k],
++ &locals->VRatioPrefetchY[k],
++ &locals->VRatioPrefetchC[k],
++ &locals->RequiredPrefetchPixDataBWLuma[k],
++ &locals->RequiredPrefetchPixDataBWChroma[k],
++ &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
++ &locals->Tno_bw[k],
++ &locals->prefetch_vmrow_bw[k],
++ &locals->swath_width_luma_ub[k],
++ &locals->swath_width_chroma_ub[k],
++ &mode_lib->vba.VUpdateOffsetPix[k],
++ &mode_lib->vba.VUpdateWidthPix[k],
++ &mode_lib->vba.VReadyOffsetPix[k]);
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ locals->VStartup[k] = dml_min(
++ mode_lib->vba.VStartupLines,
++ locals->MaxVStartupLines[k]);
++ if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata
++ != 0) {
++ locals->VStartup[k] =
++ locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata;
++ }
++ } else {
++ locals->VStartup[k] =
++ dml_min(
++ mode_lib->vba.VStartupLines,
++ locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]);
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ unsigned int m;
++
++ locals->cursor_bw[k] = 0;
++ locals->cursor_bw_pre[k] = 0;
++ for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
++ locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
++ locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k];
++ }
++
++ CalculateUrgentBurstFactor(
++ mode_lib->vba.DETBufferSizeInKByte,
++ mode_lib->vba.SwathHeightY[k],
++ mode_lib->vba.SwathHeightC[k],
++ locals->SwathWidthY[k],
++ mode_lib->vba.HTotal[k] /
++ mode_lib->vba.PixelClock[k],
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.CursorBufferSize,
++ mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
++ dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
++ mode_lib->vba.VRatio[k],
++ locals->VRatioPrefetchY[k],
++ locals->VRatioPrefetchC[k],
++ locals->BytePerPixelDETY[k],
++ locals->BytePerPixelDETC[k],
++ &locals->UrgentBurstFactorCursor[k],
++ &locals->UrgentBurstFactorCursorPre[k],
++ &locals->UrgentBurstFactorLuma[k],
++ &locals->UrgentBurstFactorLumaPre[k],
++ &locals->UrgentBurstFactorChroma[k],
++ &locals->UrgentBurstFactorChromaPre[k],
++ &locals->NotEnoughUrgentLatencyHiding,
++ &locals->NotEnoughUrgentLatencyHidingPre);
++
++ if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
++ locals->UrgentBurstFactorLuma[k] = 1;
++ locals->UrgentBurstFactorChroma[k] = 1;
++ locals->UrgentBurstFactorCursor[k] = 1;
++ locals->UrgentBurstFactorLumaPre[k] = 1;
++ locals->UrgentBurstFactorChromaPre[k] = 1;
++ locals->UrgentBurstFactorCursorPre[k] = 1;
++ }
++
++ MaxTotalRDBandwidth = MaxTotalRDBandwidth +
++ dml_max3(locals->prefetch_vmrow_bw[k],
++ locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
++ + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k]
++ * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
++ locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k]
++ * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
++
++ MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
++ dml_max3(locals->prefetch_vmrow_bw[k],
++ locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k]
++ + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
++ locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
++
++ if (locals->DestinationLinesForPrefetch[k] < 2)
++ DestinationLineTimesForPrefetchLessThan2 = true;
++ if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4)
++ VRatioPrefetchMoreThan4 = true;
++ }
++ mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW;
++
++ if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding == 0 && locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
++ && !DestinationLineTimesForPrefetchLessThan2)
++ mode_lib->vba.PrefetchModeSupported = true;
++ else {
++ mode_lib->vba.PrefetchModeSupported = false;
++ dml_print(
++ "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
++ }
++
++ if (mode_lib->vba.PrefetchModeSupported == true) {
++ mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.BandwidthAvailableForImmediateFlip =
++ mode_lib->vba.BandwidthAvailableForImmediateFlip
++ - dml_max(
++ locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
++ + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k]
++ + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
++ locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] +
++ locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] +
++ locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
++ }
++
++ mode_lib->vba.TotImmediateFlipBytes = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k];
++ }
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ CalculateFlipSchedule(
++ mode_lib,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ mode_lib->vba.UrgentExtraLatency,
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.HostVMEnable,
++ mode_lib->vba.HostVMMaxPageTableLevels,
++ mode_lib->vba.HostVMCachedPageTableLevels,
++ mode_lib->vba.GPUVMEnable,
++ locals->PDEAndMetaPTEBytesFrame[k],
++ locals->MetaRowByte[k],
++ locals->PixelPTEBytesPerRow[k],
++ mode_lib->vba.BandwidthAvailableForImmediateFlip,
++ mode_lib->vba.TotImmediateFlipBytes,
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.VRatio[k],
++ locals->Tno_bw[k],
++ mode_lib->vba.DCCEnable[k],
++ locals->dpte_row_height[k],
++ locals->meta_row_height[k],
++ locals->dpte_row_height_chroma[k],
++ locals->meta_row_height_chroma[k],
++ &locals->DestinationLinesToRequestVMInImmediateFlip[k],
++ &locals->DestinationLinesToRequestRowInImmediateFlip[k],
++ &locals->final_flip_bw[k],
++ &locals->ImmediateFlipSupportedForPipe[k]);
++ }
++ mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
++ mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.total_dcn_read_bw_with_flip =
++ mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
++ locals->prefetch_vmrow_bw[k],
++ locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
++ + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
++ locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k]
++ + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k]
++ + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
++ mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst =
++ mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst +
++ dml_max3(locals->prefetch_vmrow_bw[k],
++ locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k],
++ locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
++
++ }
++ mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW;
++
++ mode_lib->vba.ImmediateFlipSupported = true;
++ if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) {
++ mode_lib->vba.ImmediateFlipSupported = false;
++ }
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (locals->ImmediateFlipSupportedForPipe[k] == false) {
++ mode_lib->vba.ImmediateFlipSupported = false;
++ }
++ }
++ } else {
++ mode_lib->vba.ImmediateFlipSupported = false;
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.ErrorResult[k]) {
++ mode_lib->vba.PrefetchModeSupported = false;
++ dml_print(
++ "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
++ }
++ }
++
++ mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1;
++ } while (!((mode_lib->vba.PrefetchModeSupported
++ && ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable)
++ || mode_lib->vba.ImmediateFlipSupported))
++ || locals->MaximumMaxVStartupLines < mode_lib->vba.VStartupLines));
++
++ //Watermarks and NB P-State/DRAM Clock Change Support
++ {
++ enum clock_change_support DRAMClockChangeSupport; // dummy
++ CalculateWatermarksAndDRAMSpeedChangeSupport(
++ mode_lib,
++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
++ mode_lib->vba.NumberOfActivePlanes,
++ mode_lib->vba.MaxLineBufferLines,
++ mode_lib->vba.LineBufferSize,
++ mode_lib->vba.DPPOutputBufferPixels,
++ mode_lib->vba.DETBufferSizeInKByte,
++ mode_lib->vba.WritebackInterfaceLumaBufferSize,
++ mode_lib->vba.WritebackInterfaceChromaBufferSize,
++ mode_lib->vba.DCFCLK,
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
++ mode_lib->vba.ReturnBW,
++ mode_lib->vba.GPUVMEnable,
++ locals->dpte_group_bytes,
++ mode_lib->vba.MetaChunkSize,
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.UrgentExtraLatency,
++ mode_lib->vba.WritebackLatency,
++ mode_lib->vba.WritebackChunkSize,
++ mode_lib->vba.SOCCLK,
++ mode_lib->vba.DRAMClockChangeLatency,
++ mode_lib->vba.SRExitTime,
++ mode_lib->vba.SREnterPlusExitTime,
++ mode_lib->vba.DCFCLKDeepSleep,
++ mode_lib->vba.DPPPerPlane,
++ mode_lib->vba.DCCEnable,
++ locals->DPPCLK,
++ locals->SwathWidthSingleDPPY,
++ mode_lib->vba.SwathHeightY,
++ locals->ReadBandwidthPlaneLuma,
++ mode_lib->vba.SwathHeightC,
++ locals->ReadBandwidthPlaneChroma,
++ mode_lib->vba.LBBitPerPixel,
++ locals->SwathWidthY,
++ mode_lib->vba.HRatio,
++ mode_lib->vba.vtaps,
++ mode_lib->vba.VTAPsChroma,
++ mode_lib->vba.VRatio,
++ mode_lib->vba.HTotal,
++ mode_lib->vba.PixelClock,
++ mode_lib->vba.BlendingAndTiming,
++ locals->BytePerPixelDETY,
++ locals->BytePerPixelDETC,
++ mode_lib->vba.WritebackEnable,
++ mode_lib->vba.WritebackPixelFormat,
++ mode_lib->vba.WritebackDestinationWidth,
++ mode_lib->vba.WritebackDestinationHeight,
++ mode_lib->vba.WritebackSourceHeight,
++ &DRAMClockChangeSupport,
++ &mode_lib->vba.UrgentWatermark,
++ &mode_lib->vba.WritebackUrgentWatermark,
++ &mode_lib->vba.DRAMClockChangeWatermark,
++ &mode_lib->vba.WritebackDRAMClockChangeWatermark,
++ &mode_lib->vba.StutterExitWatermark,
++ &mode_lib->vba.StutterEnterPlusExitWatermark,
++ &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
++ }
++
++
++ //Display Pipeline Delivery Time in Prefetch, Groups
++ CalculatePixelDeliveryTimes(
++ mode_lib->vba.NumberOfActivePlanes,
++ mode_lib->vba.VRatio,
++ locals->VRatioPrefetchY,
++ locals->VRatioPrefetchC,
++ locals->swath_width_luma_ub,
++ locals->swath_width_chroma_ub,
++ mode_lib->vba.DPPPerPlane,
++ mode_lib->vba.HRatio,
++ mode_lib->vba.PixelClock,
++ locals->PSCL_THROUGHPUT_LUMA,
++ locals->PSCL_THROUGHPUT_CHROMA,
++ locals->DPPCLK,
++ locals->BytePerPixelDETC,
++ mode_lib->vba.SourceScan,
++ locals->BlockWidth256BytesY,
++ locals->BlockHeight256BytesY,
++ locals->BlockWidth256BytesC,
++ locals->BlockHeight256BytesC,
++ locals->DisplayPipeLineDeliveryTimeLuma,
++ locals->DisplayPipeLineDeliveryTimeChroma,
++ locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
++ locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
++ locals->DisplayPipeRequestDeliveryTimeLuma,
++ locals->DisplayPipeRequestDeliveryTimeChroma,
++ locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
++ locals->DisplayPipeRequestDeliveryTimeChromaPrefetch);
++
++ CalculateMetaAndPTETimes(
++ mode_lib->vba.NumberOfActivePlanes,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.MetaChunkSize,
++ mode_lib->vba.MinMetaChunkSizeBytes,
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.HTotal,
++ mode_lib->vba.VRatio,
++ locals->VRatioPrefetchY,
++ locals->VRatioPrefetchC,
++ locals->DestinationLinesToRequestRowInVBlank,
++ locals->DestinationLinesToRequestRowInImmediateFlip,
++ locals->DestinationLinesToRequestVMInVBlank,
++ locals->DestinationLinesToRequestVMInImmediateFlip,
++ mode_lib->vba.DCCEnable,
++ mode_lib->vba.PixelClock,
++ locals->BytePerPixelDETY,
++ locals->BytePerPixelDETC,
++ mode_lib->vba.SourceScan,
++ locals->dpte_row_height,
++ locals->dpte_row_height_chroma,
++ locals->meta_row_width,
++ locals->meta_row_height,
++ locals->meta_req_width,
++ locals->meta_req_height,
++ locals->dpte_group_bytes,
++ locals->PTERequestSizeY,
++ locals->PTERequestSizeC,
++ locals->PixelPTEReqWidthY,
++ locals->PixelPTEReqHeightY,
++ locals->PixelPTEReqWidthC,
++ locals->PixelPTEReqHeightC,
++ locals->dpte_row_width_luma_ub,
++ locals->dpte_row_width_chroma_ub,
++ locals->vm_group_bytes,
++ locals->dpde0_bytes_per_frame_ub_l,
++ locals->dpde0_bytes_per_frame_ub_c,
++ locals->meta_pte_bytes_per_frame_ub_l,
++ locals->meta_pte_bytes_per_frame_ub_c,
++ locals->DST_Y_PER_PTE_ROW_NOM_L,
++ locals->DST_Y_PER_PTE_ROW_NOM_C,
++ locals->DST_Y_PER_META_ROW_NOM_L,
++ locals->TimePerMetaChunkNominal,
++ locals->TimePerMetaChunkVBlank,
++ locals->TimePerMetaChunkFlip,
++ locals->time_per_pte_group_nom_luma,
++ locals->time_per_pte_group_vblank_luma,
++ locals->time_per_pte_group_flip_luma,
++ locals->time_per_pte_group_nom_chroma,
++ locals->time_per_pte_group_vblank_chroma,
++ locals->time_per_pte_group_flip_chroma,
++ locals->TimePerVMGroupVBlank,
++ locals->TimePerVMGroupFlip,
++ locals->TimePerVMRequestVBlank,
++ locals->TimePerVMRequestFlip);
++
++
++ // Min TTUVBlank
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
++ locals->AllowDRAMClockChangeDuringVBlank[k] = true;
++ locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
++ locals->MinTTUVBlank[k] = dml_max(
++ mode_lib->vba.DRAMClockChangeWatermark,
++ dml_max(
++ mode_lib->vba.StutterEnterPlusExitWatermark,
++ mode_lib->vba.UrgentWatermark));
++ } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) {
++ locals->AllowDRAMClockChangeDuringVBlank[k] = false;
++ locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
++ locals->MinTTUVBlank[k] = dml_max(
++ mode_lib->vba.StutterEnterPlusExitWatermark,
++ mode_lib->vba.UrgentWatermark);
++ } else {
++ locals->AllowDRAMClockChangeDuringVBlank[k] = false;
++ locals->AllowDRAMSelfRefreshDuringVBlank[k] = false;
++ locals->MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark;
++ }
++ if (!mode_lib->vba.DynamicMetadataEnable[k])
++ locals->MinTTUVBlank[k] = mode_lib->vba.TCalc
++ + locals->MinTTUVBlank[k];
++ }
++
++ // DCC Configuration
++ mode_lib->vba.ActiveDPPs = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ locals->MaximumDCCCompressionYSurface[k] = CalculateDCCConfiguration(
++ mode_lib->vba.DCCEnable[k],
++ false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
++ mode_lib->vba.ViewportWidth[k],
++ mode_lib->vba.ViewportHeight[k],
++ mode_lib->vba.DETBufferSizeInKByte * 1024,
++ locals->BlockHeight256BytesY[k],
++ mode_lib->vba.SwathHeightY[k],
++ mode_lib->vba.SurfaceTiling[k],
++ locals->BytePerPixelDETY[k],
++ mode_lib->vba.SourceScan[k],
++ &locals->DCCYMaxUncompressedBlock[k],
++ &locals->DCCYMaxCompressedBlock[k],
++ &locals->DCCYIndependent64ByteBlock[k]);
++ }
++
++ //XFC Parameters:
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.XFCEnabled[k] == true) {
++ double TWait;
++
++ locals->XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset;
++ locals->XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth;
++ locals->XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset;
++ TWait = CalculateTWait(
++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
++ mode_lib->vba.DRAMClockChangeLatency,
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.SREnterPlusExitTime);
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ locals->SwathWidthY[k],
++ dml_ceil(locals->BytePerPixelDETY[k], 1),
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.XFCTSlvVupdateOffset,
++ mode_lib->vba.XFCTSlvVupdateWidth,
++ mode_lib->vba.XFCTSlvVreadyOffset,
++ mode_lib->vba.XFCXBUFLatencyTolerance,
++ mode_lib->vba.XFCFillBWOverhead,
++ mode_lib->vba.XFCSlvChunkSize,
++ mode_lib->vba.XFCBusTransportTime,
++ mode_lib->vba.TCalc,
++ TWait,
++ &mode_lib->vba.SrcActiveDrainRate,
++ &mode_lib->vba.TInitXFill,
++ &mode_lib->vba.TslvChk);
++ locals->XFCRemoteSurfaceFlipLatency[k] =
++ dml_floor(
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay
++ / (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]),
++ 1);
++ locals->XFCTransferDelay[k] =
++ dml_ceil(
++ mode_lib->vba.XFCBusTransportTime
++ / (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]),
++ 1);
++ locals->XFCPrechargeDelay[k] =
++ dml_ceil(
++ (mode_lib->vba.XFCBusTransportTime
++ + mode_lib->vba.TInitXFill
++ + mode_lib->vba.TslvChk)
++ / (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]),
++ 1);
++ mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance
++ * mode_lib->vba.SrcActiveDrainRate;
++ mode_lib->vba.FinalFillMargin =
++ (locals->DestinationLinesToRequestVMInVBlank[k]
++ + locals->DestinationLinesToRequestRowInVBlank[k])
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]
++ * mode_lib->vba.SrcActiveDrainRate
++ + mode_lib->vba.XFCFillConstant;
++ mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay
++ * mode_lib->vba.SrcActiveDrainRate
++ + mode_lib->vba.FinalFillMargin;
++ mode_lib->vba.RemainingFillLevel = dml_max(
++ 0.0,
++ mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel);
++ mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel
++ / (mode_lib->vba.SrcActiveDrainRate
++ * mode_lib->vba.XFCFillBWOverhead / 100);
++ locals->XFCPrefetchMargin[k] =
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay
++ + mode_lib->vba.TFinalxFill
++ + (locals->DestinationLinesToRequestVMInVBlank[k]
++ + locals->DestinationLinesToRequestRowInVBlank[k])
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k];
++ } else {
++ locals->XFCSlaveVUpdateOffset[k] = 0;
++ locals->XFCSlaveVupdateWidth[k] = 0;
++ locals->XFCSlaveVReadyOffset[k] = 0;
++ locals->XFCRemoteSurfaceFlipLatency[k] = 0;
++ locals->XFCPrechargeDelay[k] = 0;
++ locals->XFCTransferDelay[k] = 0;
++ locals->XFCPrefetchMargin[k] = 0;
++ }
++ }
++
++ // Stutter Efficiency
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ CalculateDETBufferSize(
++ mode_lib->vba.DETBufferSizeInKByte,
++ mode_lib->vba.SwathHeightY[k],
++ mode_lib->vba.SwathHeightC[k],
++ &locals->DETBufferSizeY[k],
++ &locals->DETBufferSizeC[k]);
++
++ locals->LinesInDETY[k] = locals->DETBufferSizeY[k]
++ / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k];
++ locals->LinesInDETYRoundedDownToSwath[k] = dml_floor(
++ locals->LinesInDETY[k],
++ mode_lib->vba.SwathHeightY[k]);
++ locals->FullDETBufferingTimeY[k] =
++ locals->LinesInDETYRoundedDownToSwath[k]
++ * (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k])
++ / mode_lib->vba.VRatio[k];
++ }
++
++ mode_lib->vba.StutterPeriod = 999999.0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (locals->FullDETBufferingTimeY[k] < mode_lib->vba.StutterPeriod) {
++ mode_lib->vba.StutterPeriod = locals->FullDETBufferingTimeY[k];
++ mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
++ (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k];
++ locals->BytePerPixelYCriticalPlane = dml_ceil(locals->BytePerPixelDETY[k], 1);
++ locals->SwathWidthYCriticalPlane = locals->SwathWidthY[k];
++ locals->LinesToFinishSwathTransferStutterCriticalPlane =
++ mode_lib->vba.SwathHeightY[k] - (locals->LinesInDETY[k] - locals->LinesInDETYRoundedDownToSwath[k]);
++ }
++ }
++
++ mode_lib->vba.AverageReadBandwidth = 0.0;
++ mode_lib->vba.TotalRowReadBandwidth = 0.0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ unsigned int DCCRateLimit;
++
++ if (mode_lib->vba.DCCEnable[k]) {
++ if (locals->DCCYMaxCompressedBlock[k] == 256)
++ DCCRateLimit = 4;
++ else
++ DCCRateLimit = 2;
++
++ mode_lib->vba.AverageReadBandwidth =
++ mode_lib->vba.AverageReadBandwidth
++ + (locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k]) /
++ dml_min(mode_lib->vba.DCCRate[k], DCCRateLimit);
++ } else {
++ mode_lib->vba.AverageReadBandwidth =
++ mode_lib->vba.AverageReadBandwidth
++ + locals->ReadBandwidthPlaneLuma[k]
++ + locals->ReadBandwidthPlaneChroma[k];
++ }
++ mode_lib->vba.TotalRowReadBandwidth = mode_lib->vba.TotalRowReadBandwidth +
++ locals->meta_row_bw[k] + locals->dpte_row_bw[k];
++ }
++
++ mode_lib->vba.AverageDCCCompressionRate = mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.AverageReadBandwidth;
++
++ mode_lib->vba.PartOfBurstThatFitsInROB =
++ dml_min(
++ mode_lib->vba.StutterPeriod
++ * mode_lib->vba.TotalDataReadBandwidth,
++ mode_lib->vba.ROBBufferSizeInKByte * 1024
++ * mode_lib->vba.AverageDCCCompressionRate);
++ mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB
++ / mode_lib->vba.AverageDCCCompressionRate / mode_lib->vba.ReturnBW
++ + (mode_lib->vba.StutterPeriod * mode_lib->vba.TotalDataReadBandwidth
++ - mode_lib->vba.PartOfBurstThatFitsInROB)
++ / (mode_lib->vba.DCFCLK * 64)
++ + mode_lib->vba.StutterPeriod * mode_lib->vba.TotalRowReadBandwidth / mode_lib->vba.ReturnBW;
++ mode_lib->vba.StutterBurstTime = dml_max(
++ mode_lib->vba.StutterBurstTime,
++ (locals->LinesToFinishSwathTransferStutterCriticalPlane * locals->BytePerPixelYCriticalPlane *
++ locals->SwathWidthYCriticalPlane / mode_lib->vba.ReturnBW)
++ );
++
++ mode_lib->vba.TotalActiveWriteback = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
++ }
++ }
++
++ if (mode_lib->vba.TotalActiveWriteback == 0) {
++ mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1
++ - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime)
++ / mode_lib->vba.StutterPeriod) * 100;
++ } else {
++ mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0;
++ }
++
++ mode_lib->vba.SmallestVBlank = 999999;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
++ mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k]
++ - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k];
++ } else {
++ mode_lib->vba.VBlankTime = 0;
++ }
++ mode_lib->vba.SmallestVBlank = dml_min(
++ mode_lib->vba.SmallestVBlank,
++ mode_lib->vba.VBlankTime);
++ }
++
++ mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100
++ * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime
++ - mode_lib->vba.SmallestVBlank)
++ + mode_lib->vba.SmallestVBlank)
++ / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100;
++}
++
++static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
++{
++ // Display Pipe Configuration
++ double BytePerPixDETY;
++ double BytePerPixDETC;
++ double Read256BytesBlockHeightY;
++ double Read256BytesBlockHeightC;
++ double Read256BytesBlockWidthY;
++ double Read256BytesBlockWidthC;
++ double MaximumSwathHeightY;
++ double MaximumSwathHeightC;
++ double MinimumSwathHeightY;
++ double MinimumSwathHeightC;
++ double SwathWidth;
++ double SwathWidthGranularityY;
++ double SwathWidthGranularityC;
++ double RoundedUpMaxSwathSizeBytesY;
++ double RoundedUpMaxSwathSizeBytesC;
++ unsigned int j, k;
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ bool MainPlaneDoesODMCombine = false;
++
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
++ BytePerPixDETY = 8;
++ BytePerPixDETC = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
++ BytePerPixDETY = 4;
++ BytePerPixDETC = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
++ BytePerPixDETY = 2;
++ BytePerPixDETC = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) {
++ BytePerPixDETY = 1;
++ BytePerPixDETC = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
++ BytePerPixDETY = 1;
++ BytePerPixDETC = 2;
++ } else {
++ BytePerPixDETY = 4.0 / 3.0;
++ BytePerPixDETC = 8.0 / 3.0;
++ }
++
++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ Read256BytesBlockHeightY = 1;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
++ Read256BytesBlockHeightY = 4;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
++ Read256BytesBlockHeightY = 8;
++ } else {
++ Read256BytesBlockHeightY = 16;
++ }
++ Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
++ / Read256BytesBlockHeightY;
++ Read256BytesBlockHeightC = 0;
++ Read256BytesBlockWidthC = 0;
++ } else {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ Read256BytesBlockHeightY = 1;
++ Read256BytesBlockHeightC = 1;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
++ Read256BytesBlockHeightY = 16;
++ Read256BytesBlockHeightC = 8;
++ } else {
++ Read256BytesBlockHeightY = 8;
++ Read256BytesBlockHeightC = 8;
++ }
++ Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
++ / Read256BytesBlockHeightY;
++ Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2)
++ / Read256BytesBlockHeightC;
++ }
++
++ if (mode_lib->vba.SourceScan[k] == dm_horz) {
++ MaximumSwathHeightY = Read256BytesBlockHeightY;
++ MaximumSwathHeightC = Read256BytesBlockHeightC;
++ } else {
++ MaximumSwathHeightY = Read256BytesBlockWidthY;
++ MaximumSwathHeightC = Read256BytesBlockWidthC;
++ }
++
++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
++ || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ && (mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_4kb_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_4kb_s_x
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s_t
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s_x
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_var_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_var_s_x)
++ && mode_lib->vba.SourceScan[k] == dm_horz)) {
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8
++ && mode_lib->vba.SourceScan[k] != dm_horz) {
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ } else {
++ MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
++ }
++ MinimumSwathHeightC = MaximumSwathHeightC;
++ } else {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ MinimumSwathHeightC = MaximumSwathHeightC;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
++ && mode_lib->vba.SourceScan[k] == dm_horz) {
++ MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
++ MinimumSwathHeightC = MaximumSwathHeightC;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
++ && mode_lib->vba.SourceScan[k] == dm_horz) {
++ MinimumSwathHeightC = MaximumSwathHeightC / 2.0;
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ } else {
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ MinimumSwathHeightC = MaximumSwathHeightC;
++ }
++ }
++
++ if (mode_lib->vba.SourceScan[k] == dm_horz) {
++ SwathWidth = mode_lib->vba.ViewportWidth[k];
++ } else {
++ SwathWidth = mode_lib->vba.ViewportHeight[k];
++ }
++
++ if (mode_lib->vba.ODMCombineEnabled[k] == true) {
++ MainPlaneDoesODMCombine = true;
++ }
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
++ if (mode_lib->vba.BlendingAndTiming[k] == j
++ && mode_lib->vba.ODMCombineEnabled[j] == true) {
++ MainPlaneDoesODMCombine = true;
++ }
++ }
++
++ if (MainPlaneDoesODMCombine == true) {
++ SwathWidth = dml_min(
++ SwathWidth,
++ mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]);
++ } else {
++ SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k];
++ }
++
++ SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY;
++ RoundedUpMaxSwathSizeBytesY = (dml_ceil(
++ (double) (SwathWidth - 1),
++ SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY
++ * MaximumSwathHeightY;
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
++ RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256)
++ + 256;
++ }
++ if (MaximumSwathHeightC > 0) {
++ SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2)
++ / MaximumSwathHeightC;
++ RoundedUpMaxSwathSizeBytesC = (dml_ceil(
++ (double) (SwathWidth / 2.0 - 1),
++ SwathWidthGranularityC) + SwathWidthGranularityC)
++ * BytePerPixDETC * MaximumSwathHeightC;
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
++ RoundedUpMaxSwathSizeBytesC = dml_ceil(
++ RoundedUpMaxSwathSizeBytesC,
++ 256) + 256;
++ }
++ } else
++ RoundedUpMaxSwathSizeBytesC = 0.0;
++
++ if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
++ <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
++ mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
++ mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
++ } else {
++ mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY;
++ mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC;
++ }
++
++ CalculateDETBufferSize(
++ mode_lib->vba.DETBufferSizeInKByte,
++ mode_lib->vba.SwathHeightY[k],
++ mode_lib->vba.SwathHeightC[k],
++ &mode_lib->vba.DETBufferSizeY[k],
++ &mode_lib->vba.DETBufferSizeC[k]);
++ }
++}
++
++static double CalculateTWait(
++ unsigned int PrefetchMode,
++ double DRAMClockChangeLatency,
++ double UrgentLatency,
++ double SREnterPlusExitTime)
++{
++ if (PrefetchMode == 0) {
++ return dml_max(
++ DRAMClockChangeLatency + UrgentLatency,
++ dml_max(SREnterPlusExitTime, UrgentLatency));
++ } else if (PrefetchMode == 1) {
++ return dml_max(SREnterPlusExitTime, UrgentLatency);
++ } else {
++ return UrgentLatency;
++ }
++}
++
++static double CalculateRemoteSurfaceFlipDelay(
++ struct display_mode_lib *mode_lib,
++ double VRatio,
++ double SwathWidth,
++ double Bpp,
++ double LineTime,
++ double XFCTSlvVupdateOffset,
++ double XFCTSlvVupdateWidth,
++ double XFCTSlvVreadyOffset,
++ double XFCXBUFLatencyTolerance,
++ double XFCFillBWOverhead,
++ double XFCSlvChunkSize,
++ double XFCBusTransportTime,
++ double TCalc,
++ double TWait,
++ double *SrcActiveDrainRate,
++ double *TInitXFill,
++ double *TslvChk)
++{
++ double TSlvSetup, AvgfillRate, result;
++
++ *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime;
++ TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset;
++ *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100);
++ AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100);
++ *TslvChk = XFCSlvChunkSize / AvgfillRate;
++ dml_print(
++ "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n",
++ *SrcActiveDrainRate);
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup);
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill);
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate);
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk);
++ result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result);
++ return result;
++}
++
++static double CalculateWriteBackDelay(
++ enum source_format_class WritebackPixelFormat,
++ double WritebackHRatio,
++ double WritebackVRatio,
++ unsigned int WritebackLumaHTaps,
++ unsigned int WritebackLumaVTaps,
++ unsigned int WritebackChromaHTaps,
++ unsigned int WritebackChromaVTaps,
++ unsigned int WritebackDestinationWidth)
++{
++ double CalculateWriteBackDelay =
++ dml_max(
++ dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio,
++ WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1)
++ * dml_ceil(
++ WritebackDestinationWidth
++ / 4.0,
++ 1)
++ + dml_ceil(1.0 / WritebackVRatio, 1)
++ * (dml_ceil(
++ WritebackLumaVTaps
++ / 4.0,
++ 1) + 4));
++
++ if (WritebackPixelFormat != dm_444_32) {
++ CalculateWriteBackDelay =
++ dml_max(
++ CalculateWriteBackDelay,
++ dml_max(
++ dml_ceil(
++ WritebackChromaHTaps
++ / 2.0,
++ 1)
++ / (2
++ * WritebackHRatio),
++ WritebackChromaVTaps
++ * dml_ceil(
++ 1
++ / (2
++ * WritebackVRatio),
++ 1)
++ * dml_ceil(
++ WritebackDestinationWidth
++ / 2.0
++ / 2.0,
++ 1)
++ + dml_ceil(
++ 1
++ / (2
++ * WritebackVRatio),
++ 1)
++ * (dml_ceil(
++ WritebackChromaVTaps
++ / 4.0,
++ 1)
++ + 4)));
++ }
++ return CalculateWriteBackDelay;
++}
++
++static void CalculateActiveRowBandwidth(
++ bool GPUVMEnable,
++ enum source_format_class SourcePixelFormat,
++ double VRatio,
++ bool DCCEnable,
++ double LineTime,
++ unsigned int MetaRowByteLuma,
++ unsigned int MetaRowByteChroma,
++ unsigned int meta_row_height_luma,
++ unsigned int meta_row_height_chroma,
++ unsigned int PixelPTEBytesPerRowLuma,
++ unsigned int PixelPTEBytesPerRowChroma,
++ unsigned int dpte_row_height_luma,
++ unsigned int dpte_row_height_chroma,
++ double *meta_row_bw,
++ double *dpte_row_bw)
++{
++ if (DCCEnable != true) {
++ *meta_row_bw = 0;
++ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
++ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
++ + VRatio / 2 * MetaRowByteChroma
++ / (meta_row_height_chroma * LineTime);
++ } else {
++ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
++ }
++
++ if (GPUVMEnable != true) {
++ *dpte_row_bw = 0;
++ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
++ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
++ + VRatio / 2 * PixelPTEBytesPerRowChroma
++ / (dpte_row_height_chroma * LineTime);
++ } else {
++ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
++ }
++}
++
++static void CalculateFlipSchedule(
++ struct display_mode_lib *mode_lib,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ double UrgentExtraLatency,
++ double UrgentLatency,
++ unsigned int GPUVMMaxPageTableLevels,
++ bool HostVMEnable,
++ unsigned int HostVMMaxPageTableLevels,
++ unsigned int HostVMCachedPageTableLevels,
++ bool GPUVMEnable,
++ double PDEAndMetaPTEBytesPerFrame,
++ double MetaRowBytes,
++ double DPTEBytesPerRow,
++ double BandwidthAvailableForImmediateFlip,
++ unsigned int TotImmediateFlipBytes,
++ enum source_format_class SourcePixelFormat,
++ double LineTime,
++ double VRatio,
++ double Tno_bw,
++ bool DCCEnable,
++ unsigned int dpte_row_height,
++ unsigned int meta_row_height,
++ unsigned int dpte_row_height_chroma,
++ unsigned int meta_row_height_chroma,
++ double *DestinationLinesToRequestVMInImmediateFlip,
++ double *DestinationLinesToRequestRowInImmediateFlip,
++ double *final_flip_bw,
++ bool *ImmediateFlipSupportedForPipe)
++{
++ double min_row_time = 0.0;
++ unsigned int HostVMDynamicLevels;
++ double TimeForFetchingMetaPTEImmediateFlip;
++ double TimeForFetchingRowInVBlankImmediateFlip;
++ double ImmediateFlipBW;
++ double HostVMInefficiencyFactor;
++
++ if (GPUVMEnable == true && HostVMEnable == true) {
++ HostVMInefficiencyFactor =
++ PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
++ / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
++ HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
++ } else {
++ HostVMInefficiencyFactor = 1;
++ HostVMDynamicLevels = 0;
++ }
++
++ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow)
++ * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
++
++ if (GPUVMEnable == true) {
++ TimeForFetchingMetaPTEImmediateFlip = dml_max3(
++ Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
++ UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevels + 1) - 1),
++ LineTime / 4.0);
++ } else {
++ TimeForFetchingMetaPTEImmediateFlip = 0;
++ }
++
++ *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
++ if ((GPUVMEnable == true || DCCEnable == true)) {
++ TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevels + 1), LineTime / 4);
++ } else {
++ TimeForFetchingRowInVBlankImmediateFlip = 0;
++ }
++
++ *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
++ *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
++ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
++ if (GPUVMEnable == true && DCCEnable != true) {
++ min_row_time = dml_min(
++ dpte_row_height * LineTime / VRatio,
++ dpte_row_height_chroma * LineTime / (VRatio / 2));
++ } else if (GPUVMEnable != true && DCCEnable == true) {
++ min_row_time = dml_min(
++ meta_row_height * LineTime / VRatio,
++ meta_row_height_chroma * LineTime / (VRatio / 2));
++ } else {
++ min_row_time = dml_min4(
++ dpte_row_height * LineTime / VRatio,
++ meta_row_height * LineTime / VRatio,
++ dpte_row_height_chroma * LineTime / (VRatio / 2),
++ meta_row_height_chroma * LineTime / (VRatio / 2));
++ }
++ } else {
++ if (GPUVMEnable == true && DCCEnable != true) {
++ min_row_time = dpte_row_height * LineTime / VRatio;
++ } else if (GPUVMEnable != true && DCCEnable == true) {
++ min_row_time = meta_row_height * LineTime / VRatio;
++ } else {
++ min_row_time = dml_min(
++ dpte_row_height * LineTime / VRatio,
++ meta_row_height * LineTime / VRatio);
++ }
++ }
++
++ if (*DestinationLinesToRequestVMInImmediateFlip >= 32
++ || *DestinationLinesToRequestRowInImmediateFlip >= 16
++ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
++ *ImmediateFlipSupportedForPipe = false;
++ } else {
++ *ImmediateFlipSupportedForPipe = true;
++ }
++}
++
++static unsigned int TruncToValidBPP(
++ double DecimalBPP,
++ double DesiredBPP,
++ bool DSCEnabled,
++ enum output_encoder_class Output,
++ enum output_format_class Format,
++ unsigned int DSCInputBitPerComponent)
++{
++ if (Output == dm_hdmi) {
++ if (Format == dm_420) {
++ if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
++ return 18;
++ else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
++ return 15;
++ else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
++ return 12;
++ else
++ return BPP_INVALID;
++ } else if (Format == dm_444) {
++ if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
++ return 36;
++ else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
++ return 30;
++ else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
++ return 24;
++ else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
++ return 18;
++ else
++ return BPP_INVALID;
++ } else {
++ if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
++ return 24;
++ else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
++ return 20;
++ else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
++ return 16;
++ else
++ return BPP_INVALID;
++ }
++ } else {
++ if (DSCEnabled) {
++ if (Format == dm_420) {
++ if (DesiredBPP == 0) {
++ if (DecimalBPP < 6)
++ return BPP_INVALID;
++ else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0)
++ return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0;
++ else
++ return dml_floor(16 * DecimalBPP, 1) / 16.0;
++ } else {
++ if (DecimalBPP < 6
++ || DesiredBPP < 6
++ || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0
++ || DecimalBPP < DesiredBPP) {
++ return BPP_INVALID;
++ } else {
++ return DesiredBPP;
++ }
++ }
++ } else if (Format == dm_n422) {
++ if (DesiredBPP == 0) {
++ if (DecimalBPP < 7)
++ return BPP_INVALID;
++ else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0)
++ return 2 * DSCInputBitPerComponent - 1.0 / 16.0;
++ else
++ return dml_floor(16 * DecimalBPP, 1) / 16.0;
++ } else {
++ if (DecimalBPP < 7
++ || DesiredBPP < 7
++ || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0
++ || DecimalBPP < DesiredBPP) {
++ return BPP_INVALID;
++ } else {
++ return DesiredBPP;
++ }
++ }
++ } else {
++ if (DesiredBPP == 0) {
++ if (DecimalBPP < 8)
++ return BPP_INVALID;
++ else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0)
++ return 3 * DSCInputBitPerComponent - 1.0 / 16.0;
++ else
++ return dml_floor(16 * DecimalBPP, 1) / 16.0;
++ } else {
++ if (DecimalBPP < 8
++ || DesiredBPP < 8
++ || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0
++ || DecimalBPP < DesiredBPP) {
++ return BPP_INVALID;
++ } else {
++ return DesiredBPP;
++ }
++ }
++ }
++ } else if (Format == dm_420) {
++ if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
++ return 18;
++ else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
++ return 15;
++ else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
++ return 12;
++ else
++ return BPP_INVALID;
++ } else if (Format == dm_s422 || Format == dm_n422) {
++ if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
++ return 24;
++ else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
++ return 20;
++ else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
++ return 16;
++ else
++ return BPP_INVALID;
++ } else {
++ if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
++ return 36;
++ else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
++ return 30;
++ else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
++ return 24;
++ else
++ return BPP_INVALID;
++ }
++ }
++}
++
++void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
++{
++ struct vba_vars_st *locals = &mode_lib->vba;
++
++ int i;
++ unsigned int j, k, m;
++
++ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
++
++ /*Scale Ratio, taps Support Check*/
++
++ mode_lib->vba.ScaleRatioAndTapsSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.ScalerEnabled[k] == false
++ && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)
++ || mode_lib->vba.HRatio[k] != 1.0
++ || mode_lib->vba.htaps[k] != 1.0
++ || mode_lib->vba.VRatio[k] != 1.0
++ || mode_lib->vba.vtaps[k] != 1.0)) {
++ mode_lib->vba.ScaleRatioAndTapsSupport = false;
++ } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0
++ || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0
++ || (mode_lib->vba.htaps[k] > 1.0
++ && (mode_lib->vba.htaps[k] % 2) == 1)
++ || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
++ || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
++ || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
++ || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
++ || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
++ && (mode_lib->vba.HRatio[k] / 2.0
++ > mode_lib->vba.HTAPsChroma[k]
++ || mode_lib->vba.VRatio[k] / 2.0
++ > mode_lib->vba.VTAPsChroma[k]))) {
++ mode_lib->vba.ScaleRatioAndTapsSupport = false;
++ }
++ }
++ /*Source Format, Pixel Format and Scan Support Check*/
++
++ mode_lib->vba.SourceFormatPixelAndScanSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
++ && mode_lib->vba.SourceScan[k] != dm_horz)
++ || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x)
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_64)
++ || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x
++ && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_420_8
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_420_10))
++ || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_gfx7_2d_thin_lvp)
++ && !((mode_lib->vba.SourcePixelFormat[k]
++ == dm_444_64
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_444_32)
++ && mode_lib->vba.SourceScan[k]
++ == dm_horz
++ && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp
++ == true
++ && mode_lib->vba.DCCEnable[k]
++ == false))
++ || (mode_lib->vba.DCCEnable[k] == true
++ && (mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_linear
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_420_8
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_420_10)))) {
++ mode_lib->vba.SourceFormatPixelAndScanSupport = false;
++ }
++ }
++ /*Bandwidth Support Check*/
++
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
++ locals->BytePerPixelInDETY[k] = 8.0;
++ locals->BytePerPixelInDETC[k] = 0.0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
++ locals->BytePerPixelInDETY[k] = 4.0;
++ locals->BytePerPixelInDETC[k] = 0.0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
++ locals->BytePerPixelInDETY[k] = 2.0;
++ locals->BytePerPixelInDETC[k] = 0.0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
++ locals->BytePerPixelInDETY[k] = 1.0;
++ locals->BytePerPixelInDETC[k] = 0.0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
++ locals->BytePerPixelInDETY[k] = 1.0;
++ locals->BytePerPixelInDETC[k] = 2.0;
++ } else {
++ locals->BytePerPixelInDETY[k] = 4.0 / 3;
++ locals->BytePerPixelInDETC[k] = 8.0 / 3;
++ }
++ if (mode_lib->vba.SourceScan[k] == dm_horz) {
++ locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
++ } else {
++ locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
++ locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0)
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0;
++ locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k];
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true
++ && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]) * 4.0;
++ } else if (mode_lib->vba.WritebackEnable[k] == true
++ && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]) * 3.0;
++ } else if (mode_lib->vba.WritebackEnable[k] == true) {
++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]) * 1.5;
++ } else {
++ locals->WriteBandwidth[k] = 0.0;
++ }
++ }
++ mode_lib->vba.DCCEnabledInAnyPlane = false;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.DCCEnable[k] == true) {
++ mode_lib->vba.DCCEnabledInAnyPlane = true;
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->IdealSDPPortBandwidthPerState[i] = dml_min3(
++ mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i],
++ mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels
++ * mode_lib->vba.DRAMChannelWidth,
++ mode_lib->vba.FabricClockPerState[i]
++ * mode_lib->vba.FabricDatapathToDCNDataReturn);
++ if (mode_lib->vba.HostVMEnable == false) {
++ locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i]
++ * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0;
++ } else {
++ locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i]
++ * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0;
++ }
++ }
++ /*Writeback Latency support check*/
++
++ mode_lib->vba.WritebackLatencySupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
++ if (locals->WriteBandwidth[k]
++ > (mode_lib->vba.WritebackInterfaceLumaBufferSize
++ + mode_lib->vba.WritebackInterfaceChromaBufferSize)
++ / mode_lib->vba.WritebackLatency) {
++ mode_lib->vba.WritebackLatencySupport = false;
++ }
++ } else {
++ if (locals->WriteBandwidth[k]
++ > 1.5
++ * dml_min(
++ mode_lib->vba.WritebackInterfaceLumaBufferSize,
++ 2.0
++ * mode_lib->vba.WritebackInterfaceChromaBufferSize)
++ / mode_lib->vba.WritebackLatency) {
++ mode_lib->vba.WritebackLatencySupport = false;
++ }
++ }
++ }
++ }
++ /*Re-ordering Buffer Support Check*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
++ (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
++ + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly)
++ * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i];
++ if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i]
++ > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
++ locals->ROBSupport[i] = true;
++ } else {
++ locals->ROBSupport[i] = false;
++ }
++ }
++ /*Writeback Mode Support Check*/
++
++ mode_lib->vba.TotalNumberOfActiveWriteback = 0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0)
++ mode_lib->vba.ActiveWritebacksPerPlane[k] = 1;
++ mode_lib->vba.TotalNumberOfActiveWriteback =
++ mode_lib->vba.TotalNumberOfActiveWriteback
++ + mode_lib->vba.ActiveWritebacksPerPlane[k];
++ }
++ }
++ mode_lib->vba.WritebackModeSupport = true;
++ if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) {
++ mode_lib->vba.WritebackModeSupport = false;
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true
++ && mode_lib->vba.Writeback10bpc420Supported != true
++ && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
++ mode_lib->vba.WritebackModeSupport = false;
++ }
++ }
++ /*Writeback Scale Ratio and Taps Support Check*/
++
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false
++ && (mode_lib->vba.WritebackHRatio[k] != 1.0
++ || mode_lib->vba.WritebackVRatio[k] != 1.0)) {
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
++ }
++ if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
++ || mode_lib->vba.WritebackVRatio[k]
++ > mode_lib->vba.WritebackMaxVSCLRatio
++ || mode_lib->vba.WritebackHRatio[k]
++ < mode_lib->vba.WritebackMinHSCLRatio
++ || mode_lib->vba.WritebackVRatio[k]
++ < mode_lib->vba.WritebackMinVSCLRatio
++ || mode_lib->vba.WritebackLumaHTaps[k]
++ > mode_lib->vba.WritebackMaxHSCLTaps
++ || mode_lib->vba.WritebackLumaVTaps[k]
++ > mode_lib->vba.WritebackMaxVSCLTaps
++ || mode_lib->vba.WritebackHRatio[k]
++ > mode_lib->vba.WritebackLumaHTaps[k]
++ || mode_lib->vba.WritebackVRatio[k]
++ > mode_lib->vba.WritebackLumaVTaps[k]
++ || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0
++ && ((mode_lib->vba.WritebackLumaHTaps[k] % 2)
++ == 1))
++ || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32
++ && (mode_lib->vba.WritebackChromaHTaps[k]
++ > mode_lib->vba.WritebackMaxHSCLTaps
++ || mode_lib->vba.WritebackChromaVTaps[k]
++ > mode_lib->vba.WritebackMaxVSCLTaps
++ || 2.0
++ * mode_lib->vba.WritebackHRatio[k]
++ > mode_lib->vba.WritebackChromaHTaps[k]
++ || 2.0
++ * mode_lib->vba.WritebackVRatio[k]
++ > mode_lib->vba.WritebackChromaVTaps[k]
++ || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0
++ && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) {
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
++ }
++ if (mode_lib->vba.WritebackVRatio[k] < 1.0) {
++ mode_lib->vba.WritebackLumaVExtra =
++ dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0);
++ } else {
++ mode_lib->vba.WritebackLumaVExtra = -1;
++ }
++ if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32
++ && mode_lib->vba.WritebackLumaVTaps[k]
++ > (mode_lib->vba.WritebackLineBufferLumaBufferSize
++ + mode_lib->vba.WritebackLineBufferChromaBufferSize)
++ / 3.0
++ / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackLumaVExtra)
++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
++ && mode_lib->vba.WritebackLumaVTaps[k]
++ > mode_lib->vba.WritebackLineBufferLumaBufferSize
++ * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackLumaVExtra)
++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
++ && mode_lib->vba.WritebackLumaVTaps[k]
++ > mode_lib->vba.WritebackLineBufferLumaBufferSize
++ * 8.0 / 10.0
++ / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackLumaVExtra)) {
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
++ }
++ if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) {
++ mode_lib->vba.WritebackChromaVExtra = 0.0;
++ } else {
++ mode_lib->vba.WritebackChromaVExtra = -1;
++ }
++ if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
++ && mode_lib->vba.WritebackChromaVTaps[k]
++ > mode_lib->vba.WritebackLineBufferChromaBufferSize
++ * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackChromaVExtra)
++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
++ && mode_lib->vba.WritebackChromaVTaps[k]
++ > mode_lib->vba.WritebackLineBufferChromaBufferSize
++ * 8.0 / 10.0
++ / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackChromaVExtra)) {
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
++ }
++ }
++ }
++ /*Maximum DISPCLK/DPPCLK Support check*/
++
++ mode_lib->vba.WritebackRequiredDISPCLK = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ mode_lib->vba.WritebackRequiredDISPCLK =
++ dml_max(
++ mode_lib->vba.WritebackRequiredDISPCLK,
++ CalculateWriteBackDISPCLK(
++ mode_lib->vba.WritebackPixelFormat[k],
++ mode_lib->vba.PixelClock[k],
++ mode_lib->vba.WritebackHRatio[k],
++ mode_lib->vba.WritebackVRatio[k],
++ mode_lib->vba.WritebackLumaHTaps[k],
++ mode_lib->vba.WritebackLumaVTaps[k],
++ mode_lib->vba.WritebackChromaHTaps[k],
++ mode_lib->vba.WritebackChromaVTaps[k],
++ mode_lib->vba.WritebackDestinationWidth[k],
++ mode_lib->vba.HTotal[k],
++ mode_lib->vba.WritebackChromaLineBufferWidth));
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.HRatio[k] > 1.0) {
++ locals->PSCL_FACTOR[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput
++ * mode_lib->vba.HRatio[k]
++ / dml_ceil(
++ mode_lib->vba.htaps[k]
++ / 6.0,
++ 1.0));
++ } else {
++ locals->PSCL_FACTOR[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput);
++ }
++ if (locals->BytePerPixelInDETC[k] == 0.0) {
++ locals->PSCL_FACTOR_CHROMA[k] = 0.0;
++ locals->MinDPPCLKUsingSingleDPP[k] =
++ mode_lib->vba.PixelClock[k]
++ * dml_max3(
++ mode_lib->vba.vtaps[k] / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]),
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / locals->PSCL_FACTOR[k],
++ 1.0);
++ if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0)
++ && locals->MinDPPCLKUsingSingleDPP[k]
++ < 2.0 * mode_lib->vba.PixelClock[k]) {
++ locals->MinDPPCLKUsingSingleDPP[k] = 2.0
++ * mode_lib->vba.PixelClock[k];
++ }
++ } else {
++ if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) {
++ locals->PSCL_FACTOR_CHROMA[k] =
++ dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput
++ * mode_lib->vba.HRatio[k]
++ / 2.0
++ / dml_ceil(
++ mode_lib->vba.HTAPsChroma[k]
++ / 6.0,
++ 1.0));
++ } else {
++ locals->PSCL_FACTOR_CHROMA[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput);
++ }
++ locals->MinDPPCLKUsingSingleDPP[k] =
++ mode_lib->vba.PixelClock[k]
++ * dml_max5(
++ mode_lib->vba.vtaps[k] / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]),
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / locals->PSCL_FACTOR[k],
++ mode_lib->vba.VTAPsChroma[k]
++ / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]
++ / 2.0),
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / 4.0
++ / locals->PSCL_FACTOR_CHROMA[k],
++ 1.0);
++ if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0
++ || mode_lib->vba.HTAPsChroma[k] > 6.0
++ || mode_lib->vba.VTAPsChroma[k] > 6.0)
++ && locals->MinDPPCLKUsingSingleDPP[k]
++ < 2.0 * mode_lib->vba.PixelClock[k]) {
++ locals->MinDPPCLKUsingSingleDPP[k] = 2.0
++ * mode_lib->vba.PixelClock[k];
++ }
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ Calculate256BBlockSizes(
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
++ dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
++ &locals->Read256BlockHeightY[k],
++ &locals->Read256BlockHeightC[k],
++ &locals->Read256BlockWidthY[k],
++ &locals->Read256BlockWidthC[k]);
++ if (mode_lib->vba.SourceScan[k] == dm_horz) {
++ locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k];
++ locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k];
++ } else {
++ locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k];
++ locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k];
++ }
++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
++ || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ && (mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_4kb_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_4kb_s_x
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s_t
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s_x
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_var_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_var_s_x)
++ && mode_lib->vba.SourceScan[k] == dm_horz)) {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
++ } else {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
++ / 2.0;
++ }
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
++ } else {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
++ && mode_lib->vba.SourceScan[k] == dm_horz) {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
++ / 2.0;
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
++ && mode_lib->vba.SourceScan[k] == dm_horz) {
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]
++ / 2.0;
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
++ } else {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
++ }
++ }
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ mode_lib->vba.MaximumSwathWidthSupport = 8192.0;
++ } else {
++ mode_lib->vba.MaximumSwathWidthSupport = 5120.0;
++ }
++ mode_lib->vba.MaximumSwathWidthInDETBuffer =
++ dml_min(
++ mode_lib->vba.MaximumSwathWidthSupport,
++ mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0
++ / (locals->BytePerPixelInDETY[k]
++ * locals->MinSwathHeightY[k]
++ + locals->BytePerPixelInDETC[k]
++ / 2.0
++ * locals->MinSwathHeightC[k]));
++ if (locals->BytePerPixelInDETC[k] == 0.0) {
++ mode_lib->vba.MaximumSwathWidthInLineBuffer =
++ mode_lib->vba.LineBufferSize
++ * dml_max(mode_lib->vba.HRatio[k], 1.0)
++ / mode_lib->vba.LBBitPerPixel[k]
++ / (mode_lib->vba.vtaps[k]
++ + dml_max(
++ dml_ceil(
++ mode_lib->vba.VRatio[k],
++ 1.0)
++ - 2,
++ 0.0));
++ } else {
++ mode_lib->vba.MaximumSwathWidthInLineBuffer =
++ dml_min(
++ mode_lib->vba.LineBufferSize
++ * dml_max(
++ mode_lib->vba.HRatio[k],
++ 1.0)
++ / mode_lib->vba.LBBitPerPixel[k]
++ / (mode_lib->vba.vtaps[k]
++ + dml_max(
++ dml_ceil(
++ mode_lib->vba.VRatio[k],
++ 1.0)
++ - 2,
++ 0.0)),
++ 2.0 * mode_lib->vba.LineBufferSize
++ * dml_max(
++ mode_lib->vba.HRatio[k]
++ / 2.0,
++ 1.0)
++ / mode_lib->vba.LBBitPerPixel[k]
++ / (mode_lib->vba.VTAPsChroma[k]
++ + dml_max(
++ dml_ceil(
++ mode_lib->vba.VRatio[k]
++ / 2.0,
++ 1.0)
++ - 2,
++ 0.0)));
++ }
++ locals->MaximumSwathWidth[k] = dml_min(
++ mode_lib->vba.MaximumSwathWidthInDETBuffer,
++ mode_lib->vba.MaximumSwathWidthInLineBuffer);
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
++ mode_lib->vba.MaxDispclk[i],
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
++ mode_lib->vba.MaxDppclk[i],
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ locals->RequiredDISPCLK[i][j] = 0.0;
++ locals->DISPCLK_DPPCLK_Support[i][j] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine =
++ mode_lib->vba.PixelClock[k]
++ * (1.0
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100.0)
++ * (1.0
++ + mode_lib->vba.DISPCLKRampingMargin
++ / 100.0);
++ if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i]
++ && i == mode_lib->vba.soc.num_states)
++ mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k]
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++
++ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
++ if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i]
++ && i == mode_lib->vba.soc.num_states)
++ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++ if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
++ locals->ODMCombineEnablePerState[i][k] = false;
++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
++ } else {
++ locals->ODMCombineEnablePerState[i][k] = true;
++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
++ }
++ if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
++ && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
++ && locals->ODMCombineEnablePerState[i][k] == false) {
++ locals->NoOfDPP[i][j][k] = 1;
++ locals->RequiredDPPCLK[i][j][k] =
++ locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++ } else {
++ locals->NoOfDPP[i][j][k] = 2;
++ locals->RequiredDPPCLK[i][j][k] =
++ locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
++ }
++ locals->RequiredDISPCLK[i][j] = dml_max(
++ locals->RequiredDISPCLK[i][j],
++ mode_lib->vba.PlaneRequiredDISPCLK);
++ if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
++ > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity)
++ || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) {
++ locals->DISPCLK_DPPCLK_Support[i][j] = false;
++ }
++ }
++ locals->TotalNumberOfActiveDPP[i][j] = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
++ if (j == 1) {
++ while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP
++ && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) {
++ double BWOfNonSplitPlaneOfMaximumBandwidth;
++ unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
++
++ BWOfNonSplitPlaneOfMaximumBandwidth = 0;
++ NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) {
++ BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k];
++ NumberOfNonSplitPlaneOfMaximumBandwidth = k;
++ }
++ }
++ locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
++ locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
++ locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1;
++ }
++ }
++ if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) {
++ locals->RequiredDISPCLK[i][j] = 0.0;
++ locals->DISPCLK_DPPCLK_Support[i][j] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->ODMCombineEnablePerState[i][k] = false;
++ if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
++ locals->NoOfDPP[i][j][k] = 1;
++ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++ } else {
++ locals->NoOfDPP[i][j][k] = 2;
++ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
++ }
++ if (i != mode_lib->vba.soc.num_states) {
++ mode_lib->vba.PlaneRequiredDISPCLK =
++ mode_lib->vba.PixelClock[k]
++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
++ * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
++ } else {
++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k]
++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++ }
++ locals->RequiredDISPCLK[i][j] = dml_max(
++ locals->RequiredDISPCLK[i][j],
++ mode_lib->vba.PlaneRequiredDISPCLK);
++ if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
++ > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
++ || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)
++ locals->DISPCLK_DPPCLK_Support[i][j] = false;
++ }
++ locals->TotalNumberOfActiveDPP[i][j] = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
++ }
++ locals->RequiredDISPCLK[i][j] = dml_max(
++ locals->RequiredDISPCLK[i][j],
++ mode_lib->vba.WritebackRequiredDISPCLK);
++ if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity
++ < mode_lib->vba.WritebackRequiredDISPCLK) {
++ locals->DISPCLK_DPPCLK_Support[i][j] = false;
++ }
++ }
++ }
++ /*Viewport Size Check*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->ViewportSizeSupport[i] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->ODMCombineEnablePerState[i][k] == true) {
++ if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
++ > locals->MaximumSwathWidth[k]) {
++ locals->ViewportSizeSupport[i] = false;
++ }
++ } else {
++ if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
++ locals->ViewportSizeSupport[i] = false;
++ }
++ }
++ }
++ }
++ /*Total Available Pipes Support Check*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP)
++ locals->TotalAvailablePipesSupport[i][j] = true;
++ else
++ locals->TotalAvailablePipesSupport[i][j] = false;
++ }
++ }
++ /*Total Available OTG Support Check*/
++
++ mode_lib->vba.TotalNumberOfActiveOTG = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG
++ + 1.0;
++ }
++ }
++ if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) {
++ mode_lib->vba.NumberOfOTGSupport = true;
++ } else {
++ mode_lib->vba.NumberOfOTGSupport = false;
++ }
++ /*Display IO and DSC Support Check*/
++
++ mode_lib->vba.NonsupportedDSCInputBPC = false;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
++ || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
++ || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) {
++ mode_lib->vba.NonsupportedDSCInputBPC = true;
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->RequiresDSC[i][k] = 0;
++ locals->RequiresFEC[i][k] = 0;
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ if (mode_lib->vba.Output[k] == dm_hdmi) {
++ locals->RequiresDSC[i][k] = 0;
++ locals->RequiresFEC[i][k] = 0;
++ locals->OutputBppPerState[i][k] = TruncToValidBPP(
++ dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24,
++ mode_lib->vba.ForcedOutputLinkBPP[k],
++ false,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ } else if (mode_lib->vba.Output[k] == dm_dp
++ || mode_lib->vba.Output[k] == dm_edp) {
++ if (mode_lib->vba.Output[k] == dm_edp) {
++ mode_lib->vba.EffectiveFECOverhead = 0.0;
++ } else {
++ mode_lib->vba.EffectiveFECOverhead =
++ mode_lib->vba.FECOverhead;
++ }
++ if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) {
++ mode_lib->vba.Outbpp = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ mode_lib->vba.ForcedOutputLinkBPP[k],
++ false,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ mode_lib->vba.OutbppDSC = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ mode_lib->vba.ForcedOutputLinkBPP[k],
++ true,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ if (mode_lib->vba.DSCEnabled[k] == true) {
++ locals->RequiresDSC[i][k] = true;
++ if (mode_lib->vba.Output[k] == dm_dp) {
++ locals->RequiresFEC[i][k] = true;
++ } else {
++ locals->RequiresFEC[i][k] = false;
++ }
++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
++ } else {
++ locals->RequiresDSC[i][k] = false;
++ locals->RequiresFEC[i][k] = false;
++ }
++ locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
++ }
++ if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) {
++ mode_lib->vba.Outbpp = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ mode_lib->vba.ForcedOutputLinkBPP[k],
++ false,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ mode_lib->vba.OutbppDSC = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ mode_lib->vba.ForcedOutputLinkBPP[k],
++ true,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ if (mode_lib->vba.DSCEnabled[k] == true) {
++ locals->RequiresDSC[i][k] = true;
++ if (mode_lib->vba.Output[k] == dm_dp) {
++ locals->RequiresFEC[i][k] = true;
++ } else {
++ locals->RequiresFEC[i][k] = false;
++ }
++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
++ } else {
++ locals->RequiresDSC[i][k] = false;
++ locals->RequiresFEC[i][k] = false;
++ }
++ locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
++ }
++ if (mode_lib->vba.Outbpp == BPP_INVALID
++ && mode_lib->vba.PHYCLKPerState[i]
++ >= 810.0) {
++ mode_lib->vba.Outbpp = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ mode_lib->vba.ForcedOutputLinkBPP[k],
++ false,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ mode_lib->vba.OutbppDSC = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ mode_lib->vba.ForcedOutputLinkBPP[k],
++ true,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) {
++ locals->RequiresDSC[i][k] = true;
++ if (mode_lib->vba.Output[k] == dm_dp) {
++ locals->RequiresFEC[i][k] = true;
++ } else {
++ locals->RequiresFEC[i][k] = false;
++ }
++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
++ } else {
++ locals->RequiresDSC[i][k] = false;
++ locals->RequiresFEC[i][k] = false;
++ }
++ locals->OutputBppPerState[i][k] =
++ mode_lib->vba.Outbpp;
++ }
++ }
++ } else {
++ locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE;
++ }
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->DIOSupport[i] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->OutputBppPerState[i][k] == BPP_INVALID
++ || (mode_lib->vba.OutputFormat[k] == dm_420
++ && mode_lib->vba.Interlace[k] == true
++ && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) {
++ locals->DIOSupport[i] = false;
++ }
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->DSCCLKRequiredMoreThanSupported[i] = false;
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ if ((mode_lib->vba.Output[k] == dm_dp
++ || mode_lib->vba.Output[k] == dm_edp)) {
++ if (mode_lib->vba.OutputFormat[k] == dm_420
++ || mode_lib->vba.OutputFormat[k]
++ == dm_n422) {
++ mode_lib->vba.DSCFormatFactor = 2;
++ } else {
++ mode_lib->vba.DSCFormatFactor = 1;
++ }
++ if (locals->RequiresDSC[i][k] == true) {
++ if (locals->ODMCombineEnablePerState[i][k]
++ == true) {
++ if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
++ > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
++ locals->DSCCLKRequiredMoreThanSupported[i] =
++ true;
++ }
++ } else {
++ if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor
++ > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
++ locals->DSCCLKRequiredMoreThanSupported[i] =
++ true;
++ }
++ }
++ }
++ }
++ }
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->NotEnoughDSCUnits[i] = false;
++ mode_lib->vba.TotalDSCUnitsRequired = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->RequiresDSC[i][k] == true) {
++ if (locals->ODMCombineEnablePerState[i][k] == true) {
++ mode_lib->vba.TotalDSCUnitsRequired =
++ mode_lib->vba.TotalDSCUnitsRequired + 2.0;
++ } else {
++ mode_lib->vba.TotalDSCUnitsRequired =
++ mode_lib->vba.TotalDSCUnitsRequired + 1.0;
++ }
++ }
++ }
++ if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) {
++ locals->NotEnoughDSCUnits[i] = true;
++ }
++ }
++ /*DSC Delay per state*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.BlendingAndTiming[k] != k) {
++ mode_lib->vba.slices = 0;
++ } else if (locals->RequiresDSC[i][k] == 0
++ || locals->RequiresDSC[i][k] == false) {
++ mode_lib->vba.slices = 0;
++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) {
++ mode_lib->vba.slices = dml_ceil(
++ mode_lib->vba.PixelClockBackEnd[k] / 400.0,
++ 4.0);
++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) {
++ mode_lib->vba.slices = 8.0;
++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) {
++ mode_lib->vba.slices = 4.0;
++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) {
++ mode_lib->vba.slices = 2.0;
++ } else {
++ mode_lib->vba.slices = 1.0;
++ }
++ if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE
++ || locals->OutputBppPerState[i][k] == BPP_INVALID) {
++ mode_lib->vba.bpp = 0.0;
++ } else {
++ mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
++ }
++ if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
++ if (locals->ODMCombineEnablePerState[i][k] == false) {
++ locals->DSCDelayPerState[i][k] =
++ dscceComputeDelay(
++ mode_lib->vba.DSCInputBitPerComponent[k],
++ mode_lib->vba.bpp,
++ dml_ceil(
++ mode_lib->vba.HActive[k]
++ / mode_lib->vba.slices,
++ 1.0),
++ mode_lib->vba.slices,
++ mode_lib->vba.OutputFormat[k])
++ + dscComputeDelay(
++ mode_lib->vba.OutputFormat[k]);
++ } else {
++ locals->DSCDelayPerState[i][k] =
++ 2.0 * (dscceComputeDelay(
++ mode_lib->vba.DSCInputBitPerComponent[k],
++ mode_lib->vba.bpp,
++ dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0),
++ mode_lib->vba.slices / 2,
++ mode_lib->vba.OutputFormat[k])
++ + dscComputeDelay(mode_lib->vba.OutputFormat[k]));
++ }
++ locals->DSCDelayPerState[i][k] =
++ locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k];
++ } else {
++ locals->DSCDelayPerState[i][k] = 0.0;
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
++ for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true)
++ locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m];
++ }
++ }
++ }
++ }
++
++ //Prefetch Check
++ for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
++ for (j = 0; j <= 1; ++j) {
++ locals->TotalNumberOfDCCActiveDPP[i][j] = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.DCCEnable[k] == true)
++ locals->TotalNumberOfDCCActiveDPP[i][j] = locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
++ }
++ }
++ }
++
++ mode_lib->vba.UrgentLatency = dml_max3(
++ mode_lib->vba.UrgentLatencyPixelDataOnly,
++ mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
++ mode_lib->vba.UrgentLatencyVMDataOnly);
++ mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(
++ mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
++ &mode_lib->vba.MinPrefetchMode,
++ &mode_lib->vba.MaxPrefetchMode);
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k];
++ locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k];
++ if (locals->ODMCombineEnablePerState[i][k] == true) {
++ locals->SwathWidthYThisState[k] =
++ dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]));
++ } else {
++ locals->SwathWidthYThisState[k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
++ }
++ mode_lib->vba.SwathWidthGranularityY = 256.0
++ / dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
++ / locals->MaxSwathHeightY[k];
++ mode_lib->vba.RoundedUpMaxSwathSizeBytesY =
++ (dml_ceil(locals->SwathWidthYThisState[k] - 1.0, mode_lib->vba.SwathWidthGranularityY)
++ + mode_lib->vba.SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k];
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
++ mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil(
++ mode_lib->vba.RoundedUpMaxSwathSizeBytesY,
++ 256.0) + 256;
++ }
++ if (locals->MaxSwathHeightC[k] > 0.0) {
++ mode_lib->vba.SwathWidthGranularityC = 256.0 / dml_ceil(locals->BytePerPixelInDETC[k], 2.0) / locals->MaxSwathHeightC[k];
++ mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYThisState[k] / 2.0 - 1.0, mode_lib->vba.SwathWidthGranularityC)
++ + mode_lib->vba.SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k];
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
++ mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil(mode_lib->vba.RoundedUpMaxSwathSizeBytesC, 256.0) + 256;
++ }
++ } else {
++ mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0;
++ }
++ if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC
++ <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
++ locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k];
++ locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k];
++ } else {
++ locals->SwathHeightYThisState[k] =
++ locals->MinSwathHeightY[k];
++ locals->SwathHeightCThisState[k] =
++ locals->MinSwathHeightC[k];
++ }
++ }
++
++ CalculateDCFCLKDeepSleep(
++ mode_lib,
++ mode_lib->vba.NumberOfActivePlanes,
++ locals->BytePerPixelInDETY,
++ locals->BytePerPixelInDETC,
++ mode_lib->vba.VRatio,
++ locals->SwathWidthYThisState,
++ locals->NoOfDPPThisState,
++ mode_lib->vba.HRatio,
++ mode_lib->vba.PixelClock,
++ locals->PSCL_FACTOR,
++ locals->PSCL_FACTOR_CHROMA,
++ locals->RequiredDPPCLKThisState,
++ &mode_lib->vba.ProjectedDCFCLKDeepSleep);
++
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) {
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
++ mode_lib,
++ mode_lib->vba.DCCEnable[k],
++ locals->Read256BlockHeightC[k],
++ locals->Read256BlockWidthC[k],
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
++ mode_lib->vba.SourceScan[k],
++ mode_lib->vba.ViewportWidth[k] / 2.0,
++ mode_lib->vba.ViewportHeight[k] / 2.0,
++ locals->SwathWidthYThisState[k] / 2.0,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.HostVMEnable,
++ mode_lib->vba.HostVMMaxPageTableLevels,
++ mode_lib->vba.HostVMCachedPageTableLevels,
++ mode_lib->vba.VMMPageSize,
++ mode_lib->vba.PTEBufferSizeInRequestsChroma,
++ mode_lib->vba.PitchC[k],
++ 0.0,
++ &locals->MacroTileWidthC[k],
++ &mode_lib->vba.MetaRowBytesC,
++ &mode_lib->vba.DPTEBytesPerRowC,
++ &locals->PTEBufferSizeNotExceededC[i][j][k],
++ locals->dpte_row_width_chroma_ub,
++ &locals->dpte_row_height_chroma[k],
++ &locals->meta_req_width_chroma[k],
++ &locals->meta_req_height_chroma[k],
++ &locals->meta_row_width_chroma[k],
++ &locals->meta_row_height_chroma[k],
++ &locals->vm_group_bytes_chroma,
++ &locals->dpte_group_bytes_chroma,
++ locals->PixelPTEReqWidthC,
++ locals->PixelPTEReqHeightC,
++ locals->PTERequestSizeC,
++ locals->dpde0_bytes_per_frame_ub_c,
++ locals->meta_pte_bytes_per_frame_ub_c);
++ locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines(
++ mode_lib,
++ mode_lib->vba.VRatio[k]/2,
++ mode_lib->vba.VTAPsChroma[k],
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ locals->SwathHeightCThisState[k],
++ mode_lib->vba.ViewportYStartC[k],
++ &locals->PrefillC[k],
++ &locals->MaxNumSwC[k]);
++ locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma;
++ } else {
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
++ mode_lib->vba.MetaRowBytesC = 0.0;
++ mode_lib->vba.DPTEBytesPerRowC = 0.0;
++ locals->PrefetchLinesC[k] = 0.0;
++ locals->PTEBufferSizeNotExceededC[i][j][k] = true;
++ locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
++ }
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
++ mode_lib,
++ mode_lib->vba.DCCEnable[k],
++ locals->Read256BlockHeightY[k],
++ locals->Read256BlockWidthY[k],
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
++ mode_lib->vba.SourceScan[k],
++ mode_lib->vba.ViewportWidth[k],
++ mode_lib->vba.ViewportHeight[k],
++ locals->SwathWidthYThisState[k],
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.HostVMEnable,
++ mode_lib->vba.HostVMMaxPageTableLevels,
++ mode_lib->vba.HostVMCachedPageTableLevels,
++ mode_lib->vba.VMMPageSize,
++ locals->PTEBufferSizeInRequestsForLuma,
++ mode_lib->vba.PitchY[k],
++ mode_lib->vba.DCCMetaPitchY[k],
++ &locals->MacroTileWidthY[k],
++ &mode_lib->vba.MetaRowBytesY,
++ &mode_lib->vba.DPTEBytesPerRowY,
++ &locals->PTEBufferSizeNotExceededY[i][j][k],
++ locals->dpte_row_width_luma_ub,
++ &locals->dpte_row_height[k],
++ &locals->meta_req_width[k],
++ &locals->meta_req_height[k],
++ &locals->meta_row_width[k],
++ &locals->meta_row_height[k],
++ &locals->vm_group_bytes[k],
++ &locals->dpte_group_bytes[k],
++ locals->PixelPTEReqWidthY,
++ locals->PixelPTEReqHeightY,
++ locals->PTERequestSizeY,
++ locals->dpde0_bytes_per_frame_ub_l,
++ locals->meta_pte_bytes_per_frame_ub_l);
++ locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.vtaps[k],
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ locals->SwathHeightYThisState[k],
++ mode_lib->vba.ViewportYStartY[k],
++ &locals->PrefillY[k],
++ &locals->MaxNumSwY[k]);
++ locals->PDEAndMetaPTEBytesPerFrame[k] =
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
++ locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
++ locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
++
++ CalculateActiveRowBandwidth(
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.HTotal[k] /
++ mode_lib->vba.PixelClock[k],
++ mode_lib->vba.MetaRowBytesY,
++ mode_lib->vba.MetaRowBytesC,
++ locals->meta_row_height[k],
++ locals->meta_row_height_chroma[k],
++ mode_lib->vba.DPTEBytesPerRowY,
++ mode_lib->vba.DPTEBytesPerRowC,
++ locals->dpte_row_height[k],
++ locals->dpte_row_height_chroma[k],
++ &locals->meta_row_bw[k],
++ &locals->dpte_row_bw[k]);
++ }
++ mode_lib->vba.ExtraLatency = CalculateExtraLatency(
++ locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i],
++ locals->TotalNumberOfActiveDPP[i][j],
++ mode_lib->vba.PixelChunkSizeInKByte,
++ locals->TotalNumberOfDCCActiveDPP[i][j],
++ mode_lib->vba.MetaChunkSize,
++ locals->ReturnBWPerState[i],
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.HostVMEnable,
++ mode_lib->vba.NumberOfActivePlanes,
++ locals->NoOfDPPThisState,
++ locals->dpte_group_bytes,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ mode_lib->vba.HostVMMaxPageTableLevels,
++ mode_lib->vba.HostVMCachedPageTableLevels);
++
++ mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency
++ + CalculateWriteBackDelay(
++ mode_lib->vba.WritebackPixelFormat[k],
++ mode_lib->vba.WritebackHRatio[k],
++ mode_lib->vba.WritebackVRatio[k],
++ mode_lib->vba.WritebackLumaHTaps[k],
++ mode_lib->vba.WritebackLumaVTaps[k],
++ mode_lib->vba.WritebackChromaHTaps[k],
++ mode_lib->vba.WritebackChromaVTaps[k],
++ mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j];
++ } else {
++ locals->WritebackDelay[i][k] = 0.0;
++ }
++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
++ if (mode_lib->vba.BlendingAndTiming[m] == k
++ && mode_lib->vba.WritebackEnable[m]
++ == true) {
++ locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k],
++ mode_lib->vba.WritebackLatency + CalculateWriteBackDelay(
++ mode_lib->vba.WritebackPixelFormat[m],
++ mode_lib->vba.WritebackHRatio[m],
++ mode_lib->vba.WritebackVRatio[m],
++ mode_lib->vba.WritebackLumaHTaps[m],
++ mode_lib->vba.WritebackLumaVTaps[m],
++ mode_lib->vba.WritebackChromaHTaps[m],
++ mode_lib->vba.WritebackChromaVTaps[m],
++ mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]);
++ }
++ }
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == m) {
++ locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m];
++ }
++ }
++ }
++ mode_lib->vba.MaxMaxVStartup = 0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
++ - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
++ mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]);
++ }
++
++ mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode;
++ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup;
++ do {
++ mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode;
++ mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
++
++ mode_lib->vba.TWait = CalculateTWait(
++ mode_lib->vba.PrefetchMode[i][j],
++ mode_lib->vba.DRAMClockChangeLatency,
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.SREnterPlusExitTime);
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ Pipe myPipe;
++ HostVM myHostVM;
++
++ if (mode_lib->vba.XFCEnabled[k] == true) {
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay =
++ CalculateRemoteSurfaceFlipDelay(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ locals->SwathWidthYThisState[k],
++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.XFCTSlvVupdateOffset,
++ mode_lib->vba.XFCTSlvVupdateWidth,
++ mode_lib->vba.XFCTSlvVreadyOffset,
++ mode_lib->vba.XFCXBUFLatencyTolerance,
++ mode_lib->vba.XFCFillBWOverhead,
++ mode_lib->vba.XFCSlvChunkSize,
++ mode_lib->vba.XFCBusTransportTime,
++ mode_lib->vba.TimeCalc,
++ mode_lib->vba.TWait,
++ &mode_lib->vba.SrcActiveDrainRate,
++ &mode_lib->vba.TInitXFill,
++ &mode_lib->vba.TslvChk);
++ } else {
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
++ }
++
++ myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
++ myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
++ myPipe.PixelClock = mode_lib->vba.PixelClock[k];
++ myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep;
++ myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
++ myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
++ myPipe.SourceScan = mode_lib->vba.SourceScan[k];
++ myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
++ myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
++ myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
++ myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
++ myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
++ myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
++ myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
++ myPipe.HTotal = mode_lib->vba.HTotal[k];
++
++
++ myHostVM.Enable = mode_lib->vba.HostVMEnable;
++ myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
++ myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
++
++
++ mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
++ mode_lib,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ &myPipe,
++ locals->DSCDelayPerState[i][k],
++ mode_lib->vba.DPPCLKDelaySubtotal,
++ mode_lib->vba.DPPCLKDelaySCL,
++ mode_lib->vba.DPPCLKDelaySCLLBOnly,
++ mode_lib->vba.DPPCLKDelayCNVCFormater,
++ mode_lib->vba.DPPCLKDelayCNVCCursor,
++ mode_lib->vba.DISPCLKDelaySubtotal,
++ locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.MaxInterDCNTileRepeaters,
++ dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]),
++ locals->MaximumVStartup[k],
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.GPUVMEnable,
++ &myHostVM,
++ mode_lib->vba.DynamicMetadataEnable[k],
++ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
++ mode_lib->vba.DynamicMetadataTransmittedBytes[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.ExtraLatency,
++ mode_lib->vba.TimeCalc,
++ locals->PDEAndMetaPTEBytesPerFrame[k],
++ locals->MetaRowBytes[k],
++ locals->DPTEBytesPerRow[k],
++ locals->PrefetchLinesY[k],
++ locals->SwathWidthYThisState[k],
++ locals->BytePerPixelInDETY[k],
++ locals->PrefillY[k],
++ locals->MaxNumSwY[k],
++ locals->PrefetchLinesC[k],
++ locals->BytePerPixelInDETC[k],
++ locals->PrefillC[k],
++ locals->MaxNumSwC[k],
++ locals->SwathHeightYThisState[k],
++ locals->SwathHeightCThisState[k],
++ mode_lib->vba.TWait,
++ mode_lib->vba.XFCEnabled[k],
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay,
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ &locals->dst_x_after_scaler,
++ &locals->dst_y_after_scaler,
++ &locals->LineTimesForPrefetch[k],
++ &locals->PrefetchBW[k],
++ &locals->LinesForMetaPTE[k],
++ &locals->LinesForMetaAndDPTERow[k],
++ &locals->VRatioPreY[i][j][k],
++ &locals->VRatioPreC[i][j][k],
++ &locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
++ &locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
++ &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
++ &locals->Tno_bw[k],
++ &locals->prefetch_vmrow_bw[k],
++ locals->swath_width_luma_ub,
++ locals->swath_width_chroma_ub,
++ &mode_lib->vba.VUpdateOffsetPix[k],
++ &mode_lib->vba.VUpdateWidthPix[k],
++ &mode_lib->vba.VReadyOffsetPix[k]);
++ }
++ mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
++ mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ uint m;
++
++ locals->cursor_bw[k] = 0;
++ locals->cursor_bw_pre[k] = 0;
++ for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
++ locals->cursor_bw[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
++ / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
++ locals->cursor_bw_pre[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
++ / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPreY[i][j][k];
++ }
++
++ CalculateUrgentBurstFactor(
++ mode_lib->vba.DETBufferSizeInKByte,
++ locals->SwathHeightYThisState[k],
++ locals->SwathHeightCThisState[k],
++ locals->SwathWidthYThisState[k],
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.CursorBufferSize,
++ mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
++ dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
++ mode_lib->vba.VRatio[k],
++ locals->VRatioPreY[i][j][k],
++ locals->VRatioPreC[i][j][k],
++ locals->BytePerPixelInDETY[k],
++ locals->BytePerPixelInDETC[k],
++ &locals->UrgentBurstFactorCursor[k],
++ &locals->UrgentBurstFactorCursorPre[k],
++ &locals->UrgentBurstFactorLuma[k],
++ &locals->UrgentBurstFactorLumaPre[k],
++ &locals->UrgentBurstFactorChroma[k],
++ &locals->UrgentBurstFactorChromaPre[k],
++ &locals->NotEnoughUrgentLatencyHiding,
++ &locals->NotEnoughUrgentLatencyHidingPre);
++
++ if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
++ locals->UrgentBurstFactorCursor[k] = 1;
++ locals->UrgentBurstFactorCursorPre[k] = 1;
++ locals->UrgentBurstFactorLuma[k] = 1;
++ locals->UrgentBurstFactorLumaPre[k] = 1;
++ locals->UrgentBurstFactorChroma[k] = 1;
++ locals->UrgentBurstFactorChromaPre[k] = 1;
++ }
++
++ mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithoutPrefetch
++ + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + locals->ReadBandwidthLuma[k]
++ * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
++ * locals->UrgentBurstFactorChroma[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k];
++ mode_lib->vba.MaximumReadBandwidthWithPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch
++ + dml_max3(locals->prefetch_vmrow_bw[k],
++ locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
++ * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k]
++ + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
++ locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
++ + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
++ + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
++ }
++ locals->BandwidthWithoutPrefetchSupported[i] = true;
++ if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]
++ || locals->NotEnoughUrgentLatencyHiding == 1) {
++ locals->BandwidthWithoutPrefetchSupported[i] = false;
++ }
++
++ locals->PrefetchSupported[i][j] = true;
++ if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]
++ || locals->NotEnoughUrgentLatencyHiding == 1
++ || locals->NotEnoughUrgentLatencyHidingPre == 1) {
++ locals->PrefetchSupported[i][j] = false;
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->LineTimesForPrefetch[k] < 2.0
++ || locals->LinesForMetaPTE[k] >= 32.0
++ || locals->LinesForMetaAndDPTERow[k] >= 16.0
++ || mode_lib->vba.IsErrorResult[i][j][k] == true) {
++ locals->PrefetchSupported[i][j] = false;
++ }
++ }
++ locals->VRatioInPrefetchSupported[i][j] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->VRatioPreY[i][j][k] > 4.0
++ || locals->VRatioPreC[i][j][k] > 4.0
++ || mode_lib->vba.IsErrorResult[i][j][k] == true) {
++ locals->VRatioInPrefetchSupported[i][j] = false;
++ }
++ }
++ mode_lib->vba.AnyLinesForVMOrRowTooLarge = false;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (locals->LinesForMetaAndDPTERow[k] >= 16 || locals->LinesForMetaPTE[k] >= 32) {
++ mode_lib->vba.AnyLinesForVMOrRowTooLarge = true;
++ }
++ }
++
++ if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
++ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup;
++ mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1;
++ } else {
++ mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
++ }
++ } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
++ && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup
++ || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode));
++
++ if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
++ mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i];
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip
++ - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
++ + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
++ + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
++ locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
++ + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
++ + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
++ }
++ mode_lib->vba.TotImmediateFlipBytes = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
++ + locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k];
++ }
++
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ CalculateFlipSchedule(
++ mode_lib,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ mode_lib->vba.ExtraLatency,
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.HostVMEnable,
++ mode_lib->vba.HostVMMaxPageTableLevels,
++ mode_lib->vba.HostVMCachedPageTableLevels,
++ mode_lib->vba.GPUVMEnable,
++ locals->PDEAndMetaPTEBytesPerFrame[k],
++ locals->MetaRowBytes[k],
++ locals->DPTEBytesPerRow[k],
++ mode_lib->vba.BandwidthAvailableForImmediateFlip,
++ mode_lib->vba.TotImmediateFlipBytes,
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.VRatio[k],
++ locals->Tno_bw[k],
++ mode_lib->vba.DCCEnable[k],
++ locals->dpte_row_height[k],
++ locals->meta_row_height[k],
++ locals->dpte_row_height_chroma[k],
++ locals->meta_row_height_chroma[k],
++ &locals->DestinationLinesToRequestVMInImmediateFlip[k],
++ &locals->DestinationLinesToRequestRowInImmediateFlip[k],
++ &locals->final_flip_bw[k],
++ &locals->ImmediateFlipSupportedForPipe[k]);
++ }
++ mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.total_dcn_read_bw_with_flip = mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
++ locals->prefetch_vmrow_bw[k],
++ locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
++ + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
++ + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
++ locals->final_flip_bw[k] + locals->RequiredPrefetchPixelDataBWLuma[i][j][k]
++ * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixelDataBWChroma[i][j][k]
++ * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k]
++ * locals->UrgentBurstFactorCursorPre[k]);
++ }
++ locals->ImmediateFlipSupportedForState[i][j] = true;
++ if (mode_lib->vba.total_dcn_read_bw_with_flip
++ > locals->ReturnBWPerState[i]) {
++ locals->ImmediateFlipSupportedForState[i][j] = false;
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->ImmediateFlipSupportedForPipe[k] == false) {
++ locals->ImmediateFlipSupportedForState[i][j] = false;
++ }
++ }
++ } else {
++ locals->ImmediateFlipSupportedForState[i][j] = false;
++ }
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
++ CalculateWatermarksAndDRAMSpeedChangeSupport(
++ mode_lib,
++ mode_lib->vba.PrefetchMode[i][j],
++ mode_lib->vba.NumberOfActivePlanes,
++ mode_lib->vba.MaxLineBufferLines,
++ mode_lib->vba.LineBufferSize,
++ mode_lib->vba.DPPOutputBufferPixels,
++ mode_lib->vba.DETBufferSizeInKByte,
++ mode_lib->vba.WritebackInterfaceLumaBufferSize,
++ mode_lib->vba.WritebackInterfaceChromaBufferSize,
++ mode_lib->vba.DCFCLKPerState[i],
++ mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
++ locals->ReturnBWPerState[i],
++ mode_lib->vba.GPUVMEnable,
++ locals->dpte_group_bytes,
++ mode_lib->vba.MetaChunkSize,
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.ExtraLatency,
++ mode_lib->vba.WritebackLatency,
++ mode_lib->vba.WritebackChunkSize,
++ mode_lib->vba.SOCCLKPerState[i],
++ mode_lib->vba.DRAMClockChangeLatency,
++ mode_lib->vba.SRExitTime,
++ mode_lib->vba.SREnterPlusExitTime,
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ locals->NoOfDPPThisState,
++ mode_lib->vba.DCCEnable,
++ locals->RequiredDPPCLKThisState,
++ locals->SwathWidthYSingleDPP,
++ locals->SwathHeightYThisState,
++ locals->ReadBandwidthLuma,
++ locals->SwathHeightCThisState,
++ locals->ReadBandwidthChroma,
++ mode_lib->vba.LBBitPerPixel,
++ locals->SwathWidthYThisState,
++ mode_lib->vba.HRatio,
++ mode_lib->vba.vtaps,
++ mode_lib->vba.VTAPsChroma,
++ mode_lib->vba.VRatio,
++ mode_lib->vba.HTotal,
++ mode_lib->vba.PixelClock,
++ mode_lib->vba.BlendingAndTiming,
++ locals->BytePerPixelInDETY,
++ locals->BytePerPixelInDETC,
++ mode_lib->vba.WritebackEnable,
++ mode_lib->vba.WritebackPixelFormat,
++ mode_lib->vba.WritebackDestinationWidth,
++ mode_lib->vba.WritebackDestinationHeight,
++ mode_lib->vba.WritebackSourceHeight,
++ &locals->DRAMClockChangeSupport[i][j],
++ &mode_lib->vba.UrgentWatermark,
++ &mode_lib->vba.WritebackUrgentWatermark,
++ &mode_lib->vba.DRAMClockChangeWatermark,
++ &mode_lib->vba.WritebackDRAMClockChangeWatermark,
++ &mode_lib->vba.StutterExitWatermark,
++ &mode_lib->vba.StutterEnterPlusExitWatermark,
++ &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
++ }
++ }
++
++ /*Vertical Active BW support*/
++ {
++ double MaxTotalVActiveRDBandwidth = 0.0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k];
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
++ locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(
++ locals->IdealSDPPortBandwidthPerState[i] *
++ mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation
++ / 100.0, mode_lib->vba.DRAMSpeedPerState[i] *
++ mode_lib->vba.NumberOfChannels *
++ mode_lib->vba.DRAMChannelWidth *
++ mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
++ / 100.0);
++
++ if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) {
++ locals->TotalVerticalActiveBandwidthSupport[i] = true;
++ } else {
++ locals->TotalVerticalActiveBandwidthSupport[i] = false;
++ }
++ }
++ }
++
++ /*PTE Buffer Size Check*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ locals->PTEBufferSizeNotExceeded[i][j] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->PTEBufferSizeNotExceededY[i][j][k] == false
++ || locals->PTEBufferSizeNotExceededC[i][j][k] == false) {
++ locals->PTEBufferSizeNotExceeded[i][j] = false;
++ }
++ }
++ }
++ }
++ /*Cursor Support Check*/
++
++ mode_lib->vba.CursorSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.CursorWidth[k][0] > 0.0) {
++ for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
++ if (mode_lib->vba.CursorBPP[k][m] == 64 && mode_lib->vba.Cursor64BppSupport == false) {
++ mode_lib->vba.CursorSupport = false;
++ }
++ }
++ }
++ }
++ /*Valid Pitch Check*/
++
++ mode_lib->vba.PitchSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->AlignedYPitch[k] = dml_ceil(
++ dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]),
++ locals->MacroTileWidthY[k]);
++ if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) {
++ mode_lib->vba.PitchSupport = false;
++ }
++ if (mode_lib->vba.DCCEnable[k] == true) {
++ locals->AlignedDCCMetaPitch[k] = dml_ceil(
++ dml_max(
++ mode_lib->vba.DCCMetaPitchY[k],
++ mode_lib->vba.ViewportWidth[k]),
++ 64.0 * locals->Read256BlockWidthY[k]);
++ } else {
++ locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k];
++ }
++ if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) {
++ mode_lib->vba.PitchSupport = false;
++ }
++ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
++ locals->AlignedCPitch[k] = dml_ceil(
++ dml_max(
++ mode_lib->vba.PitchC[k],
++ mode_lib->vba.ViewportWidth[k] / 2.0),
++ locals->MacroTileWidthC[k]);
++ } else {
++ locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k];
++ }
++ if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) {
++ mode_lib->vba.PitchSupport = false;
++ }
++ }
++ /*Mode Support, Voltage State and SOC Configuration*/
++
++ for (i = mode_lib->vba.soc.num_states; i >= 0; i--) {
++ for (j = 0; j < 2; j++) {
++ enum dm_validation_status status = DML_VALIDATION_OK;
++
++ if (mode_lib->vba.ScaleRatioAndTapsSupport != true) {
++ status = DML_FAIL_SCALE_RATIO_TAP;
++ } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
++ status = DML_FAIL_SOURCE_PIXEL_FORMAT;
++ } else if (locals->ViewportSizeSupport[i] != true) {
++ status = DML_FAIL_VIEWPORT_SIZE;
++ } else if (locals->DIOSupport[i] != true) {
++ status = DML_FAIL_DIO_SUPPORT;
++ } else if (locals->NotEnoughDSCUnits[i] != false) {
++ status = DML_FAIL_NOT_ENOUGH_DSC;
++ } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) {
++ status = DML_FAIL_DSC_CLK_REQUIRED;
++ } else if (locals->ROBSupport[i] != true) {
++ status = DML_FAIL_REORDERING_BUFFER;
++ } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
++ status = DML_FAIL_DISPCLK_DPPCLK;
++ } else if (locals->TotalAvailablePipesSupport[i][j] != true) {
++ status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
++ } else if (mode_lib->vba.NumberOfOTGSupport != true) {
++ status = DML_FAIL_NUM_OTG;
++ } else if (mode_lib->vba.WritebackModeSupport != true) {
++ status = DML_FAIL_WRITEBACK_MODE;
++ } else if (mode_lib->vba.WritebackLatencySupport != true) {
++ status = DML_FAIL_WRITEBACK_LATENCY;
++ } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) {
++ status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
++ } else if (mode_lib->vba.CursorSupport != true) {
++ status = DML_FAIL_CURSOR_SUPPORT;
++ } else if (mode_lib->vba.PitchSupport != true) {
++ status = DML_FAIL_PITCH_SUPPORT;
++ } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) {
++ status = DML_FAIL_TOTAL_V_ACTIVE_BW;
++ } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) {
++ status = DML_FAIL_PTE_BUFFER_SIZE;
++ } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) {
++ status = DML_FAIL_DSC_INPUT_BPC;
++ } else if ((mode_lib->vba.HostVMEnable != false
++ && locals->ImmediateFlipSupportedForState[i][j] != true)) {
++ status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
++ } else if (locals->PrefetchSupported[i][j] != true) {
++ status = DML_FAIL_PREFETCH_SUPPORT;
++ } else if (locals->VRatioInPrefetchSupported[i][j] != true) {
++ status = DML_FAIL_V_RATIO_PREFETCH;
++ }
++
++ if (status == DML_VALIDATION_OK) {
++ locals->ModeSupport[i][j] = true;
++ } else {
++ locals->ModeSupport[i][j] = false;
++ }
++ locals->ValidationStatus[i] = status;
++ }
++ }
++ {
++ unsigned int MaximumMPCCombine = 0;
++ mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1;
++ for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) {
++ if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) {
++ mode_lib->vba.VoltageLevel = i;
++ if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false
++ || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible
++ || (mode_lib->vba.WhenToDoMPCCombine == dm_mpc_reduce_voltage_and_clocks
++ && ((locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vactive
++ && locals->DRAMClockChangeSupport[i][0] != dm_dram_clock_change_vactive)
++ || (locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vblank
++ && locals->DRAMClockChangeSupport[i][0] == dm_dram_clock_change_unsupported))))) {
++ MaximumMPCCombine = 1;
++ } else {
++ MaximumMPCCombine = 0;
++ }
++ break;
++ }
++ }
++ mode_lib->vba.ImmediateFlipSupport =
++ locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
++ locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
++ }
++ mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
++ mode_lib->vba.maxMpcComb = MaximumMPCCombine;
++ }
++ mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel];
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ mode_lib->vba.ODMCombineEnabled[k] =
++ locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
++ } else {
++ mode_lib->vba.ODMCombineEnabled[k] = 0;
++ }
++ mode_lib->vba.DSCEnabled[k] =
++ locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
++ mode_lib->vba.OutputBpp[k] =
++ locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k];
++ }
++}
++
++static void CalculateWatermarksAndDRAMSpeedChangeSupport(
++ struct display_mode_lib *mode_lib,
++ unsigned int PrefetchMode,
++ unsigned int NumberOfActivePlanes,
++ unsigned int MaxLineBufferLines,
++ unsigned int LineBufferSize,
++ unsigned int DPPOutputBufferPixels,
++ double DETBufferSizeInKByte,
++ unsigned int WritebackInterfaceLumaBufferSize,
++ unsigned int WritebackInterfaceChromaBufferSize,
++ double DCFCLK,
++ double UrgentOutOfOrderReturn,
++ double ReturnBW,
++ bool GPUVMEnable,
++ long dpte_group_bytes[],
++ unsigned int MetaChunkSize,
++ double UrgentLatency,
++ double ExtraLatency,
++ double WritebackLatency,
++ double WritebackChunkSize,
++ double SOCCLK,
++ double DRAMClockChangeLatency,
++ double SRExitTime,
++ double SREnterPlusExitTime,
++ double DCFCLKDeepSleep,
++ int DPPPerPlane[],
++ bool DCCEnable[],
++ double DPPCLK[],
++ unsigned int SwathWidthSingleDPPY[],
++ unsigned int SwathHeightY[],
++ double ReadBandwidthPlaneLuma[],
++ unsigned int SwathHeightC[],
++ double ReadBandwidthPlaneChroma[],
++ unsigned int LBBitPerPixel[],
++ unsigned int SwathWidthY[],
++ double HRatio[],
++ unsigned int vtaps[],
++ unsigned int VTAPsChroma[],
++ double VRatio[],
++ unsigned int HTotal[],
++ double PixelClock[],
++ unsigned int BlendingAndTiming[],
++ double BytePerPixelDETY[],
++ double BytePerPixelDETC[],
++ bool WritebackEnable[],
++ enum source_format_class WritebackPixelFormat[],
++ double WritebackDestinationWidth[],
++ double WritebackDestinationHeight[],
++ double WritebackSourceHeight[],
++ enum clock_change_support *DRAMClockChangeSupport,
++ double *UrgentWatermark,
++ double *WritebackUrgentWatermark,
++ double *DRAMClockChangeWatermark,
++ double *WritebackDRAMClockChangeWatermark,
++ double *StutterExitWatermark,
++ double *StutterEnterPlusExitWatermark,
++ double *MinActiveDRAMClockChangeLatencySupported)
++{
++ double EffectiveLBLatencyHidingY;
++ double EffectiveLBLatencyHidingC;
++ double DPPOutputBufferLinesY;
++ double DPPOutputBufferLinesC;
++ double DETBufferSizeY;
++ double DETBufferSizeC;
++ double LinesInDETY[DC__NUM_DPP__MAX];
++ double LinesInDETC;
++ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
++ unsigned int LinesInDETCRoundedDownToSwath;
++ double FullDETBufferingTimeY[DC__NUM_DPP__MAX];
++ double FullDETBufferingTimeC;
++ double ActiveDRAMClockChangeLatencyMarginY;
++ double ActiveDRAMClockChangeLatencyMarginC;
++ double WritebackDRAMClockChangeLatencyMargin;
++ double PlaneWithMinActiveDRAMClockChangeMargin;
++ double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
++ double FullDETBufferingTimeYStutterCriticalPlane = 0;
++ double TimeToFinishSwathTransferStutterCriticalPlane = 0;
++ uint k, j;
++
++ mode_lib->vba.TotalActiveDPP = 0;
++ mode_lib->vba.TotalDCCActiveDPP = 0;
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
++ if (DCCEnable[k] == true) {
++ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
++ }
++ }
++
++ mode_lib->vba.TotalDataReadBandwidth = 0;
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ mode_lib->vba.TotalDataReadBandwidth = mode_lib->vba.TotalDataReadBandwidth
++ + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
++ }
++
++ *UrgentWatermark = UrgentLatency + ExtraLatency;
++
++ *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
++
++ mode_lib->vba.TotalActiveWriteback = 0;
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (WritebackEnable[k] == true) {
++ mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
++ }
++ }
++
++ if (mode_lib->vba.TotalActiveWriteback <= 1) {
++ *WritebackUrgentWatermark = WritebackLatency;
++ } else {
++ *WritebackUrgentWatermark = WritebackLatency
++ + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
++ }
++
++ if (mode_lib->vba.TotalActiveWriteback <= 1) {
++ *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
++ } else {
++ *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency
++ + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
++ }
++
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++
++ mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines,
++ dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1))
++ - (vtaps[k] - 1);
++
++ mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines,
++ dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / 2 / dml_max(HRatio[k] / 2, 1.0)), 1))
++ - (VTAPsChroma[k] - 1);
++
++ EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k]
++ * (HTotal[k] / PixelClock[k]);
++
++ EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC
++ / (VRatio[k] / 2) * (HTotal[k] / PixelClock[k]);
++
++ if (SwathWidthY[k] > 2 * DPPOutputBufferPixels) {
++ DPPOutputBufferLinesY = (double) DPPOutputBufferPixels / SwathWidthY[k];
++ } else if (SwathWidthY[k] > DPPOutputBufferPixels) {
++ DPPOutputBufferLinesY = 0.5;
++ } else {
++ DPPOutputBufferLinesY = 1;
++ }
++
++ if (SwathWidthY[k] / 2.0 > 2 * DPPOutputBufferPixels) {
++ DPPOutputBufferLinesC = (double) DPPOutputBufferPixels
++ / (SwathWidthY[k] / 2.0);
++ } else if (SwathWidthY[k] / 2.0 > DPPOutputBufferPixels) {
++ DPPOutputBufferLinesC = 0.5;
++ } else {
++ DPPOutputBufferLinesC = 1;
++ }
++
++ CalculateDETBufferSize(
++ DETBufferSizeInKByte,
++ SwathHeightY[k],
++ SwathHeightC[k],
++ &DETBufferSizeY,
++ &DETBufferSizeC);
++
++ LinesInDETY[k] = DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
++ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
++ FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k]
++ * (HTotal[k] / PixelClock[k]) / VRatio[k];
++ if (BytePerPixelDETC[k] > 0) {
++ LinesInDETC = DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0);
++ LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
++ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath
++ * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2);
++ } else {
++ LinesInDETC = 0;
++ FullDETBufferingTimeC = 999999;
++ }
++
++ ActiveDRAMClockChangeLatencyMarginY = HTotal[k] / PixelClock[k]
++ * DPPOutputBufferLinesY + EffectiveLBLatencyHidingY
++ + FullDETBufferingTimeY[k] - *DRAMClockChangeWatermark;
++
++ if (NumberOfActivePlanes > 1) {
++ ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
++ - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
++ }
++
++ if (BytePerPixelDETC[k] > 0) {
++ ActiveDRAMClockChangeLatencyMarginC = HTotal[k] / PixelClock[k]
++ * DPPOutputBufferLinesC + EffectiveLBLatencyHidingC
++ + FullDETBufferingTimeC - *DRAMClockChangeWatermark;
++ if (NumberOfActivePlanes > 1) {
++ ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
++ - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / (VRatio[k] / 2);
++ }
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
++ ActiveDRAMClockChangeLatencyMarginY,
++ ActiveDRAMClockChangeLatencyMarginC);
++ } else {
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
++ }
++
++ if (WritebackEnable[k] == true) {
++ if (WritebackPixelFormat[k] == dm_444_32) {
++ WritebackDRAMClockChangeLatencyMargin = (WritebackInterfaceLumaBufferSize
++ + WritebackInterfaceChromaBufferSize) / (WritebackDestinationWidth[k]
++ * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k]
++ / PixelClock[k]) * 4) - *WritebackDRAMClockChangeWatermark;
++ } else {
++ WritebackDRAMClockChangeLatencyMargin = dml_min(
++ WritebackInterfaceLumaBufferSize * 8.0 / 10,
++ 2 * WritebackInterfaceChromaBufferSize * 8.0 / 10) / (WritebackDestinationWidth[k]
++ * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]))
++ - *WritebackDRAMClockChangeWatermark;
++ }
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k],
++ WritebackDRAMClockChangeLatencyMargin);
++ }
++ }
++
++ mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
++ PlaneWithMinActiveDRAMClockChangeMargin = 0;
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
++ < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
++ mode_lib->vba.MinActiveDRAMClockChangeMargin =
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
++ if (BlendingAndTiming[k] == k) {
++ PlaneWithMinActiveDRAMClockChangeMargin = k;
++ } else {
++ for (j = 0; j < NumberOfActivePlanes; ++j) {
++ if (BlendingAndTiming[k] == j) {
++ PlaneWithMinActiveDRAMClockChangeMargin = j;
++ }
++ }
++ }
++ }
++ }
++
++ *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
++
++ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k))
++ && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
++ && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
++ < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
++ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank =
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
++ }
++ }
++
++ mode_lib->vba.TotalNumberOfActiveOTG = 0;
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (BlendingAndTiming[k] == k) {
++ mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
++ }
++ }
++
++ if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
++ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
++ } else if (((mode_lib->vba.SynchronizedVBlank == true
++ || mode_lib->vba.TotalNumberOfActiveOTG == 1
++ || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0)
++ && PrefetchMode == 0)) {
++ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
++ } else {
++ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
++ }
++
++ FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
++ TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k]
++ - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]))
++ * (HTotal[k] / PixelClock[k]) / VRatio[k];
++ }
++ }
++
++ *StutterExitWatermark = SRExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
++ + ExtraLatency + 10 / DCFCLKDeepSleep;
++ *StutterEnterPlusExitWatermark = dml_max(
++ SREnterPlusExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
++ + ExtraLatency + 10 / DCFCLKDeepSleep,
++ TimeToFinishSwathTransferStutterCriticalPlane);
++
++}
++
++static void CalculateDCFCLKDeepSleep(
++ struct display_mode_lib *mode_lib,
++ unsigned int NumberOfActivePlanes,
++ double BytePerPixelDETY[],
++ double BytePerPixelDETC[],
++ double VRatio[],
++ unsigned int SwathWidthY[],
++ int DPPPerPlane[],
++ double HRatio[],
++ double PixelClock[],
++ double PSCL_THROUGHPUT[],
++ double PSCL_THROUGHPUT_CHROMA[],
++ double DPPCLK[],
++ double *DCFCLKDeepSleep)
++{
++ uint k;
++ double DisplayPipeLineDeliveryTimeLuma;
++ double DisplayPipeLineDeliveryTimeChroma;
++ //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
++
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (VRatio[k] <= 1) {
++ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k]
++ / HRatio[k] / PixelClock[k];
++ } else {
++ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k]
++ / DPPCLK[k];
++ }
++ if (BytePerPixelDETC[k] == 0) {
++ DisplayPipeLineDeliveryTimeChroma = 0;
++ } else {
++ if (VRatio[k] / 2 <= 1) {
++ DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
++ * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
++ } else {
++ DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
++ / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
++ }
++ }
++
++ if (BytePerPixelDETC[k] > 0) {
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
++ 1.1 * SwathWidthY[k] * dml_ceil(BytePerPixelDETY[k], 1)
++ / 32.0 / DisplayPipeLineDeliveryTimeLuma,
++ 1.1 * SwathWidthY[k] / 2.0
++ * dml_ceil(BytePerPixelDETC[k], 2) / 32.0
++ / DisplayPipeLineDeliveryTimeChroma);
++ } else {
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k]
++ * dml_ceil(BytePerPixelDETY[k], 1) / 64.0
++ / DisplayPipeLineDeliveryTimeLuma;
++ }
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k],
++ PixelClock[k] / 16);
++
++ }
++
++ *DCFCLKDeepSleep = 8;
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ *DCFCLKDeepSleep = dml_max(
++ *DCFCLKDeepSleep,
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
++ }
++}
++
++static void CalculateDETBufferSize(
++ double DETBufferSizeInKByte,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ double *DETBufferSizeY,
++ double *DETBufferSizeC)
++{
++ if (SwathHeightC == 0) {
++ *DETBufferSizeY = DETBufferSizeInKByte * 1024;
++ *DETBufferSizeC = 0;
++ } else if (SwathHeightY <= SwathHeightC) {
++ *DETBufferSizeY = DETBufferSizeInKByte * 1024 / 2;
++ *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 2;
++ } else {
++ *DETBufferSizeY = DETBufferSizeInKByte * 1024 * 2 / 3;
++ *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 3;
++ }
++}
++
++static void CalculateUrgentBurstFactor(
++ unsigned int DETBufferSizeInKByte,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ unsigned int SwathWidthY,
++ double LineTime,
++ double UrgentLatency,
++ double CursorBufferSize,
++ unsigned int CursorWidth,
++ unsigned int CursorBPP,
++ double VRatio,
++ double VRatioPreY,
++ double VRatioPreC,
++ double BytePerPixelInDETY,
++ double BytePerPixelInDETC,
++ double *UrgentBurstFactorCursor,
++ double *UrgentBurstFactorCursorPre,
++ double *UrgentBurstFactorLuma,
++ double *UrgentBurstFactorLumaPre,
++ double *UrgentBurstFactorChroma,
++ double *UrgentBurstFactorChromaPre,
++ unsigned int *NotEnoughUrgentLatencyHiding,
++ unsigned int *NotEnoughUrgentLatencyHidingPre)
++{
++ double LinesInDETLuma;
++ double LinesInDETChroma;
++ unsigned int LinesInCursorBuffer;
++ double CursorBufferSizeInTime;
++ double CursorBufferSizeInTimePre;
++ double DETBufferSizeInTimeLuma;
++ double DETBufferSizeInTimeLumaPre;
++ double DETBufferSizeInTimeChroma;
++ double DETBufferSizeInTimeChromaPre;
++ double DETBufferSizeY;
++ double DETBufferSizeC;
++
++ *NotEnoughUrgentLatencyHiding = 0;
++ *NotEnoughUrgentLatencyHidingPre = 0;
++
++ if (CursorWidth > 0) {
++ LinesInCursorBuffer = 1 << (unsigned int) dml_floor(
++ dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
++ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
++ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
++ *NotEnoughUrgentLatencyHiding = 1;
++ *UrgentBurstFactorCursor = 0;
++ } else {
++ *UrgentBurstFactorCursor = CursorBufferSizeInTime
++ / (CursorBufferSizeInTime - UrgentLatency);
++ }
++ if (VRatioPreY > 0) {
++ CursorBufferSizeInTimePre = LinesInCursorBuffer * LineTime / VRatioPreY;
++ if (CursorBufferSizeInTimePre - UrgentLatency <= 0) {
++ *NotEnoughUrgentLatencyHidingPre = 1;
++ *UrgentBurstFactorCursorPre = 0;
++ } else {
++ *UrgentBurstFactorCursorPre = CursorBufferSizeInTimePre
++ / (CursorBufferSizeInTimePre - UrgentLatency);
++ }
++ } else {
++ *UrgentBurstFactorCursorPre = 1;
++ }
++ }
++
++ CalculateDETBufferSize(
++ DETBufferSizeInKByte,
++ SwathHeightY,
++ SwathHeightC,
++ &DETBufferSizeY,
++ &DETBufferSizeC);
++
++ LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / SwathWidthY;
++ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
++ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
++ *NotEnoughUrgentLatencyHiding = 1;
++ *UrgentBurstFactorLuma = 0;
++ } else {
++ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma
++ / (DETBufferSizeInTimeLuma - UrgentLatency);
++ }
++ if (VRatioPreY > 0) {
++ DETBufferSizeInTimeLumaPre = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime
++ / VRatioPreY;
++ if (DETBufferSizeInTimeLumaPre - UrgentLatency <= 0) {
++ *NotEnoughUrgentLatencyHidingPre = 1;
++ *UrgentBurstFactorLumaPre = 0;
++ } else {
++ *UrgentBurstFactorLumaPre = DETBufferSizeInTimeLumaPre
++ / (DETBufferSizeInTimeLumaPre - UrgentLatency);
++ }
++ } else {
++ *UrgentBurstFactorLumaPre = 1;
++ }
++
++ if (BytePerPixelInDETC > 0) {
++ LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2);
++ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime
++ / (VRatio / 2);
++ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
++ *NotEnoughUrgentLatencyHiding = 1;
++ *UrgentBurstFactorChroma = 0;
++ } else {
++ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
++ / (DETBufferSizeInTimeChroma - UrgentLatency);
++ }
++ if (VRatioPreC > 0) {
++ DETBufferSizeInTimeChromaPre = dml_floor(LinesInDETChroma, SwathHeightC)
++ * LineTime / VRatioPreC;
++ if (DETBufferSizeInTimeChromaPre - UrgentLatency <= 0) {
++ *NotEnoughUrgentLatencyHidingPre = 1;
++ *UrgentBurstFactorChromaPre = 0;
++ } else {
++ *UrgentBurstFactorChromaPre = DETBufferSizeInTimeChromaPre
++ / (DETBufferSizeInTimeChromaPre - UrgentLatency);
++ }
++ } else {
++ *UrgentBurstFactorChromaPre = 1;
++ }
++ }
++}
++
++static void CalculatePixelDeliveryTimes(
++ unsigned int NumberOfActivePlanes,
++ double VRatio[],
++ double VRatioPrefetchY[],
++ double VRatioPrefetchC[],
++ unsigned int swath_width_luma_ub[],
++ unsigned int swath_width_chroma_ub[],
++ int DPPPerPlane[],
++ double HRatio[],
++ double PixelClock[],
++ double PSCL_THROUGHPUT[],
++ double PSCL_THROUGHPUT_CHROMA[],
++ double DPPCLK[],
++ double BytePerPixelDETC[],
++ enum scan_direction_class SourceScan[],
++ unsigned int BlockWidth256BytesY[],
++ unsigned int BlockHeight256BytesY[],
++ unsigned int BlockWidth256BytesC[],
++ unsigned int BlockHeight256BytesC[],
++ double DisplayPipeLineDeliveryTimeLuma[],
++ double DisplayPipeLineDeliveryTimeChroma[],
++ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
++ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
++ double DisplayPipeRequestDeliveryTimeLuma[],
++ double DisplayPipeRequestDeliveryTimeChroma[],
++ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
++ double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
++{
++ double req_per_swath_ub;
++ uint k;
++
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (VRatio[k] <= 1) {
++ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k]
++ / HRatio[k] / PixelClock[k];
++ } else {
++ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k]
++ / PSCL_THROUGHPUT[k] / DPPCLK[k];
++ }
++
++ if (BytePerPixelDETC[k] == 0) {
++ DisplayPipeLineDeliveryTimeChroma[k] = 0;
++ } else {
++ if (VRatio[k] / 2 <= 1) {
++ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
++ * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
++ } else {
++ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
++ / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
++ }
++ }
++
++ if (VRatioPrefetchY[k] <= 1) {
++ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
++ * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
++ } else {
++ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
++ / PSCL_THROUGHPUT[k] / DPPCLK[k];
++ }
++
++ if (BytePerPixelDETC[k] == 0) {
++ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
++ } else {
++ if (VRatioPrefetchC[k] <= 1) {
++ DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
++ swath_width_chroma_ub[k] * DPPPerPlane[k]
++ / (HRatio[k] / 2) / PixelClock[k];
++ } else {
++ DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
++ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
++ }
++ }
++ }
++
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (SourceScan[k] == dm_horz) {
++ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
++ } else {
++ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
++ }
++ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k]
++ / req_per_swath_ub;
++ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
++ DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
++ if (BytePerPixelDETC[k] == 0) {
++ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
++ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
++ } else {
++ if (SourceScan[k] == dm_horz) {
++ req_per_swath_ub = swath_width_chroma_ub[k]
++ / BlockWidth256BytesC[k];
++ } else {
++ req_per_swath_ub = swath_width_chroma_ub[k]
++ / BlockHeight256BytesC[k];
++ }
++ DisplayPipeRequestDeliveryTimeChroma[k] =
++ DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
++ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
++ DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
++ }
++ }
++}
++
++static void CalculateMetaAndPTETimes(
++ unsigned int NumberOfActivePlanes,
++ bool GPUVMEnable,
++ unsigned int MetaChunkSize,
++ unsigned int MinMetaChunkSizeBytes,
++ unsigned int GPUVMMaxPageTableLevels,
++ unsigned int HTotal[],
++ double VRatio[],
++ double VRatioPrefetchY[],
++ double VRatioPrefetchC[],
++ double DestinationLinesToRequestRowInVBlank[],
++ double DestinationLinesToRequestRowInImmediateFlip[],
++ double DestinationLinesToRequestVMInVBlank[],
++ double DestinationLinesToRequestVMInImmediateFlip[],
++ bool DCCEnable[],
++ double PixelClock[],
++ double BytePerPixelDETY[],
++ double BytePerPixelDETC[],
++ enum scan_direction_class SourceScan[],
++ unsigned int dpte_row_height[],
++ unsigned int dpte_row_height_chroma[],
++ unsigned int meta_row_width[],
++ unsigned int meta_row_height[],
++ unsigned int meta_req_width[],
++ unsigned int meta_req_height[],
++ long dpte_group_bytes[],
++ unsigned int PTERequestSizeY[],
++ unsigned int PTERequestSizeC[],
++ unsigned int PixelPTEReqWidthY[],
++ unsigned int PixelPTEReqHeightY[],
++ unsigned int PixelPTEReqWidthC[],
++ unsigned int PixelPTEReqHeightC[],
++ unsigned int dpte_row_width_luma_ub[],
++ unsigned int dpte_row_width_chroma_ub[],
++ unsigned int vm_group_bytes[],
++ unsigned int dpde0_bytes_per_frame_ub_l[],
++ unsigned int dpde0_bytes_per_frame_ub_c[],
++ unsigned int meta_pte_bytes_per_frame_ub_l[],
++ unsigned int meta_pte_bytes_per_frame_ub_c[],
++ double DST_Y_PER_PTE_ROW_NOM_L[],
++ double DST_Y_PER_PTE_ROW_NOM_C[],
++ double DST_Y_PER_META_ROW_NOM_L[],
++ double TimePerMetaChunkNominal[],
++ double TimePerMetaChunkVBlank[],
++ double TimePerMetaChunkFlip[],
++ double time_per_pte_group_nom_luma[],
++ double time_per_pte_group_vblank_luma[],
++ double time_per_pte_group_flip_luma[],
++ double time_per_pte_group_nom_chroma[],
++ double time_per_pte_group_vblank_chroma[],
++ double time_per_pte_group_flip_chroma[],
++ double TimePerVMGroupVBlank[],
++ double TimePerVMGroupFlip[],
++ double TimePerVMRequestVBlank[],
++ double TimePerVMRequestFlip[])
++{
++ unsigned int meta_chunk_width;
++ unsigned int min_meta_chunk_width;
++ unsigned int meta_chunk_per_row_int;
++ unsigned int meta_row_remainder;
++ unsigned int meta_chunk_threshold;
++ unsigned int meta_chunks_per_row_ub;
++ unsigned int dpte_group_width_luma;
++ unsigned int dpte_group_width_chroma;
++ unsigned int dpte_groups_per_row_luma_ub;
++ unsigned int dpte_groups_per_row_chroma_ub;
++ unsigned int num_group_per_lower_vm_stage;
++ unsigned int num_req_per_lower_vm_stage;
++ uint k;
++
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (GPUVMEnable == true) {
++ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
++ if (BytePerPixelDETC[k] == 0) {
++ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
++ } else {
++ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / (VRatio[k] / 2);
++ }
++ } else {
++ DST_Y_PER_PTE_ROW_NOM_L[k] = 0;
++ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
++ }
++ if (DCCEnable[k] == true) {
++ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
++ } else {
++ DST_Y_PER_META_ROW_NOM_L[k] = 0;
++ }
++ }
++
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (DCCEnable[k] == true) {
++ meta_chunk_width = MetaChunkSize * 1024 * 256
++ / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
++ min_meta_chunk_width = MinMetaChunkSizeBytes * 256
++ / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
++ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
++ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
++ if (SourceScan[k] == dm_horz) {
++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
++ } else {
++ meta_chunk_threshold = 2 * min_meta_chunk_width
++ - meta_req_height[k];
++ }
++ if (meta_row_remainder <= meta_chunk_threshold) {
++ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
++ } else {
++ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
++ }
++ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k]
++ / PixelClock[k] / meta_chunks_per_row_ub;
++ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k]
++ * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
++ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k]
++ * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
++ } else {
++ TimePerMetaChunkNominal[k] = 0;
++ TimePerMetaChunkVBlank[k] = 0;
++ TimePerMetaChunkFlip[k] = 0;
++ }
++ }
++
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (GPUVMEnable == true) {
++ if (SourceScan[k] == dm_horz) {
++ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
++ * PixelPTEReqWidthY[k];
++ } else {
++ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
++ * PixelPTEReqHeightY[k];
++ }
++ dpte_groups_per_row_luma_ub = dml_ceil(
++ dpte_row_width_luma_ub[k] / dpte_group_width_luma,
++ 1);
++ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k]
++ / PixelClock[k] / dpte_groups_per_row_luma_ub;
++ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k]
++ * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
++ time_per_pte_group_flip_luma[k] =
++ DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k]
++ / PixelClock[k]
++ / dpte_groups_per_row_luma_ub;
++ if (BytePerPixelDETC[k] == 0) {
++ time_per_pte_group_nom_chroma[k] = 0;
++ time_per_pte_group_vblank_chroma[k] = 0;
++ time_per_pte_group_flip_chroma[k] = 0;
++ } else {
++ if (SourceScan[k] == dm_horz) {
++ dpte_group_width_chroma = dpte_group_bytes[k]
++ / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
++ } else {
++ dpte_group_width_chroma = dpte_group_bytes[k]
++ / PTERequestSizeC[k]
++ * PixelPTEReqHeightC[k];
++ }
++ dpte_groups_per_row_chroma_ub = dml_ceil(
++ dpte_row_width_chroma_ub[k]
++ / dpte_group_width_chroma,
++ 1);
++ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k]
++ * HTotal[k] / PixelClock[k]
++ / dpte_groups_per_row_chroma_ub;
++ time_per_pte_group_vblank_chroma[k] =
++ DestinationLinesToRequestRowInVBlank[k] * HTotal[k]
++ / PixelClock[k]
++ / dpte_groups_per_row_chroma_ub;
++ time_per_pte_group_flip_chroma[k] =
++ DestinationLinesToRequestRowInImmediateFlip[k]
++ * HTotal[k] / PixelClock[k]
++ / dpte_groups_per_row_chroma_ub;
++ }
++ } else {
++ time_per_pte_group_nom_luma[k] = 0;
++ time_per_pte_group_vblank_luma[k] = 0;
++ time_per_pte_group_flip_luma[k] = 0;
++ time_per_pte_group_nom_chroma[k] = 0;
++ time_per_pte_group_vblank_chroma[k] = 0;
++ time_per_pte_group_flip_chroma[k] = 0;
++ }
++ }
++
++ for (k = 0; k < NumberOfActivePlanes; ++k) {
++ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
++ if (DCCEnable[k] == false) {
++ if (BytePerPixelDETC[k] > 0) {
++ num_group_per_lower_vm_stage =
++ dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
++ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
++ } else {
++ num_group_per_lower_vm_stage =
++ dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
++ }
++ } else {
++ if (GPUVMMaxPageTableLevels == 1) {
++ if (BytePerPixelDETC[k] > 0) {
++ num_group_per_lower_vm_stage =
++ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
++ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
++ } else {
++ num_group_per_lower_vm_stage =
++ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
++ }
++ } else {
++ if (BytePerPixelDETC[k] > 0) {
++ num_group_per_lower_vm_stage =
++ dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
++ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
++ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
++ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
++ } else {
++ num_group_per_lower_vm_stage =
++ dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
++ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
++ }
++ }
++ }
++
++ if (DCCEnable[k] == false) {
++ if (BytePerPixelDETC[k] > 0) {
++ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
++ / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
++ } else {
++ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
++ / 64;
++ }
++ } else {
++ if (GPUVMMaxPageTableLevels == 1) {
++ if (BytePerPixelDETC[k] > 0) {
++ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
++ + meta_pte_bytes_per_frame_ub_c[k] / 64;
++ } else {
++ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
++ }
++ } else {
++ if (BytePerPixelDETC[k] > 0) {
++ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
++ + dpde0_bytes_per_frame_ub_c[k] / 64
++ + meta_pte_bytes_per_frame_ub_l[k] / 64
++ + meta_pte_bytes_per_frame_ub_c[k] / 64;
++ } else {
++ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
++ + meta_pte_bytes_per_frame_ub_l[k] / 64;
++ }
++ }
++ }
++
++ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k]
++ / PixelClock[k] / num_group_per_lower_vm_stage;
++ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
++ * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
++ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k]
++ * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
++ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
++ * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
++
++ if (GPUVMMaxPageTableLevels > 2) {
++ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
++ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
++ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
++ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
++ }
++
++ } else {
++ TimePerVMGroupVBlank[k] = 0;
++ TimePerVMGroupFlip[k] = 0;
++ TimePerVMRequestVBlank[k] = 0;
++ TimePerVMRequestFlip[k] = 0;
++ }
++ }
++}
++
++static double CalculateExtraLatency(
++ double UrgentRoundTripAndOutOfOrderLatency,
++ int TotalNumberOfActiveDPP,
++ int PixelChunkSizeInKByte,
++ int TotalNumberOfDCCActiveDPP,
++ int MetaChunkSize,
++ double ReturnBW,
++ bool GPUVMEnable,
++ bool HostVMEnable,
++ int NumberOfActivePlanes,
++ int NumberOfDPP[],
++ long dpte_group_bytes[],
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
++ int HostVMMaxPageTableLevels,
++ int HostVMCachedPageTableLevels)
++{
++ double CalculateExtraLatency;
++ double HostVMInefficiencyFactor;
++ int HostVMDynamicLevels;
++
++ if (GPUVMEnable && HostVMEnable) {
++ HostVMInefficiencyFactor =
++ PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
++ / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
++ HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
++ } else {
++ HostVMInefficiencyFactor = 1;
++ HostVMDynamicLevels = 0;
++ }
++
++ CalculateExtraLatency = UrgentRoundTripAndOutOfOrderLatency
++ + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte
++ + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0
++ / ReturnBW;
++
++ if (GPUVMEnable) {
++ int k;
++
++ for (k = 0; k < NumberOfActivePlanes; k++) {
++ CalculateExtraLatency = CalculateExtraLatency
++ + NumberOfDPP[k] * dpte_group_bytes[k]
++ * (1 + 8 * HostVMDynamicLevels)
++ * HostVMInefficiencyFactor / ReturnBW;
++ }
++ }
++ return CalculateExtraLatency;
++}
++
++#endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h
+new file mode 100644
+index 000000000000..fb9548a2f894
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h
+@@ -0,0 +1,32 @@
++/*
++ * Copyright 2017 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#ifndef __DML21_DISPLAY_MODE_VBA_H__
++#define __DML21_DISPLAY_MODE_VBA_H__
++
++void dml21_recalculate(struct display_mode_lib *mode_lib);
++void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
++
++#endif /* _DML21_DISPLAY_MODE_VBA_H_ */
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+new file mode 100644
+index 000000000000..a1f207cbb966
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+@@ -0,0 +1,1823 @@
++/*
++ * Copyright 2017 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#ifdef CONFIG_DRM_AMD_DC_DCN2_0
++
++#include "../display_mode_lib.h"
++#include "../display_mode_vba.h"
++#include "../dml_inline_defs.h"
++#include "display_rq_dlg_calc_21.h"
++
++/*
++ * NOTE:
++ * This file is gcc-parseable HW gospel, coming straight from HW engineers.
++ *
++ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
++ * ways. Unless there is something clearly wrong with it the code should
++ * remain as-is as it provides us with a guarantee from HW that it is correct.
++ */
++
++static void calculate_ttu_cursor(
++ struct display_mode_lib *mode_lib,
++ double *refcyc_per_req_delivery_pre_cur,
++ double *refcyc_per_req_delivery_cur,
++ double refclk_freq_in_mhz,
++ double ref_freq_to_pix_freq,
++ double hscale_pixel_rate_l,
++ double hscl_ratio,
++ double vratio_pre_l,
++ double vratio_l,
++ unsigned int cur_width,
++ enum cursor_bpp cur_bpp);
++
++static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
++{
++ unsigned int ret_val = 0;
++
++ if (source_format == dm_444_16) {
++ if (!is_chroma)
++ ret_val = 2;
++ } else if (source_format == dm_444_32) {
++ if (!is_chroma)
++ ret_val = 4;
++ } else if (source_format == dm_444_64) {
++ if (!is_chroma)
++ ret_val = 8;
++ } else if (source_format == dm_420_8) {
++ if (is_chroma)
++ ret_val = 2;
++ else
++ ret_val = 1;
++ } else if (source_format == dm_420_10) {
++ if (is_chroma)
++ ret_val = 4;
++ else
++ ret_val = 2;
++ } else if (source_format == dm_444_8) {
++ ret_val = 1;
++ }
++ return ret_val;
++}
++
++static bool is_dual_plane(enum source_format_class source_format)
++{
++ bool ret_val = 0;
++
++ if ((source_format == dm_420_8) || (source_format == dm_420_10))
++ ret_val = 1;
++
++ return ret_val;
++}
++
++static double get_refcyc_per_delivery(
++ struct display_mode_lib *mode_lib,
++ double refclk_freq_in_mhz,
++ double pclk_freq_in_mhz,
++ bool odm_combine,
++ unsigned int recout_width,
++ unsigned int hactive,
++ double vratio,
++ double hscale_pixel_rate,
++ unsigned int delivery_width,
++ unsigned int req_per_swath_ub)
++{
++ double refcyc_per_delivery = 0.0;
++
++ if (vratio <= 1.0) {
++ if (odm_combine)
++ refcyc_per_delivery = (double) refclk_freq_in_mhz
++ * dml_min((double) recout_width, (double) hactive / 2.0)
++ / pclk_freq_in_mhz / (double) req_per_swath_ub;
++ else
++ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width
++ / pclk_freq_in_mhz / (double) req_per_swath_ub;
++ } else {
++ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width
++ / (double) hscale_pixel_rate / (double) req_per_swath_ub;
++ }
++
++ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width);
++ dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio);
++ dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub);
++ dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery);
++
++ return refcyc_per_delivery;
++
++}
++
++static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size)
++{
++ if (tile_size == dm_256k_tile)
++ return (256 * 1024);
++ else if (tile_size == dm_64k_tile)
++ return (64 * 1024);
++ else
++ return (4 * 1024);
++}
++
++static void extract_rq_sizing_regs(
++ struct display_mode_lib *mode_lib,
++ display_data_rq_regs_st *rq_regs,
++ const display_data_rq_sizing_params_st rq_sizing)
++{
++ dml_print("DML_DLG: %s: rq_sizing param\n", __func__);
++ print__data_rq_sizing_params_st(mode_lib, rq_sizing);
++
++ rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10;
++
++ if (rq_sizing.min_chunk_bytes == 0)
++ rq_regs->min_chunk_size = 0;
++ else
++ rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1;
++
++ rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10;
++ if (rq_sizing.min_meta_chunk_bytes == 0)
++ rq_regs->min_meta_chunk_size = 0;
++ else
++ rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1;
++
++ rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6;
++ rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6;
++}
++
++static void extract_rq_regs(
++ struct display_mode_lib *mode_lib,
++ display_rq_regs_st *rq_regs,
++ const display_rq_params_st rq_param)
++{
++ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
++ unsigned int detile_buf_plane1_addr = 0;
++
++ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l);
++
++ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(
++ dml_log2(rq_param.dlg.rq_l.dpte_row_height),
++ 1) - 3;
++
++ if (rq_param.yuv420) {
++ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c);
++ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(
++ dml_log2(rq_param.dlg.rq_c.dpte_row_height),
++ 1) - 3;
++ }
++
++ rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height);
++ rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height);
++
++ // FIXME: take the max between luma, chroma chunk size?
++ // okay for now, as we are setting chunk_bytes to 8kb anyways
++ if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb
++ rq_regs->drq_expansion_mode = 0;
++ } else {
++ rq_regs->drq_expansion_mode = 2;
++ }
++ rq_regs->prq_expansion_mode = 1;
++ rq_regs->mrq_expansion_mode = 1;
++ rq_regs->crq_expansion_mode = 1;
++
++ if (rq_param.yuv420) {
++ if ((double) rq_param.misc.rq_l.stored_swath_bytes
++ / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) {
++ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma
++ } else {
++ detile_buf_plane1_addr = dml_round_to_multiple(
++ (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0),
++ 256,
++ 0) / 64.0; // 2/3 to chroma
++ }
++ }
++ rq_regs->plane1_base_address = detile_buf_plane1_addr;
++}
++
++static void handle_det_buf_split(
++ struct display_mode_lib *mode_lib,
++ display_rq_params_st *rq_param,
++ const display_pipe_source_params_st pipe_src_param)
++{
++ unsigned int total_swath_bytes = 0;
++ unsigned int swath_bytes_l = 0;
++ unsigned int swath_bytes_c = 0;
++ unsigned int full_swath_bytes_packed_l = 0;
++ unsigned int full_swath_bytes_packed_c = 0;
++ bool req128_l = 0;
++ bool req128_c = 0;
++ bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear);
++ bool surf_vert = (pipe_src_param.source_scan == dm_vert);
++ unsigned int log2_swath_height_l = 0;
++ unsigned int log2_swath_height_c = 0;
++ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
++
++ full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes;
++ full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes;
++
++ if (rq_param->yuv420_10bpc) {
++ full_swath_bytes_packed_l = dml_round_to_multiple(
++ rq_param->misc.rq_l.full_swath_bytes * 2 / 3,
++ 256,
++ 1) + 256;
++ full_swath_bytes_packed_c = dml_round_to_multiple(
++ rq_param->misc.rq_c.full_swath_bytes * 2 / 3,
++ 256,
++ 1) + 256;
++ }
++
++ if (rq_param->yuv420) {
++ total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
++
++ if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
++ req128_l = 0;
++ req128_c = 0;
++ swath_bytes_l = full_swath_bytes_packed_l;
++ swath_bytes_c = full_swath_bytes_packed_c;
++ } else { //128b request (for luma only for yuv420 8bpc)
++ req128_l = 1;
++ req128_c = 0;
++ swath_bytes_l = full_swath_bytes_packed_l / 2;
++ swath_bytes_c = full_swath_bytes_packed_c;
++ }
++ // Note: assumption, the config that pass in will fit into
++ // the detiled buffer.
++ } else {
++ total_swath_bytes = 2 * full_swath_bytes_packed_l;
++
++ if (total_swath_bytes <= detile_buf_size_in_bytes)
++ req128_l = 0;
++ else
++ req128_l = 1;
++
++ swath_bytes_l = total_swath_bytes;
++ swath_bytes_c = 0;
++ }
++ rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l;
++ rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c;
++
++ if (surf_linear) {
++ log2_swath_height_l = 0;
++ log2_swath_height_c = 0;
++ } else if (!surf_vert) {
++ log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l;
++ log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c;
++ } else {
++ log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l;
++ log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c;
++ }
++ rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
++ rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c;
++
++ dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l);
++ dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c);
++ dml_print(
++ "DML_DLG: %s: full_swath_bytes_packed_l = %0d\n",
++ __func__,
++ full_swath_bytes_packed_l);
++ dml_print(
++ "DML_DLG: %s: full_swath_bytes_packed_c = %0d\n",
++ __func__,
++ full_swath_bytes_packed_c);
++}
++
++static void get_meta_and_pte_attr(
++ struct display_mode_lib *mode_lib,
++ display_data_rq_dlg_params_st *rq_dlg_param,
++ display_data_rq_misc_params_st *rq_misc_param,
++ display_data_rq_sizing_params_st *rq_sizing_param,
++ unsigned int vp_width,
++ unsigned int vp_height,
++ unsigned int data_pitch,
++ unsigned int meta_pitch,
++ unsigned int source_format,
++ unsigned int tiling,
++ unsigned int macro_tile_size,
++ unsigned int source_scan,
++ unsigned int hostvm_enable,
++ unsigned int is_chroma)
++{
++ bool surf_linear = (tiling == dm_sw_linear);
++ bool surf_vert = (source_scan == dm_vert);
++
++ unsigned int bytes_per_element;
++ unsigned int bytes_per_element_y = get_bytes_per_element(
++ (enum source_format_class) (source_format),
++ false);
++ unsigned int bytes_per_element_c = get_bytes_per_element(
++ (enum source_format_class) (source_format),
++ true);
++
++ unsigned int blk256_width = 0;
++ unsigned int blk256_height = 0;
++
++ unsigned int blk256_width_y = 0;
++ unsigned int blk256_height_y = 0;
++ unsigned int blk256_width_c = 0;
++ unsigned int blk256_height_c = 0;
++ unsigned int log2_bytes_per_element;
++ unsigned int log2_blk256_width;
++ unsigned int log2_blk256_height;
++ unsigned int blk_bytes;
++ unsigned int log2_blk_bytes;
++ unsigned int log2_blk_height;
++ unsigned int log2_blk_width;
++ unsigned int log2_meta_req_bytes;
++ unsigned int log2_meta_req_height;
++ unsigned int log2_meta_req_width;
++ unsigned int meta_req_width;
++ unsigned int meta_req_height;
++ unsigned int log2_meta_row_height;
++ unsigned int meta_row_width_ub;
++ unsigned int log2_meta_chunk_bytes;
++ unsigned int log2_meta_chunk_height;
++
++ //full sized meta chunk width in unit of data elements
++ unsigned int log2_meta_chunk_width;
++ unsigned int log2_min_meta_chunk_bytes;
++ unsigned int min_meta_chunk_width;
++ unsigned int meta_chunk_width;
++ unsigned int meta_chunk_per_row_int;
++ unsigned int meta_row_remainder;
++ unsigned int meta_chunk_threshold;
++ unsigned int meta_blk_bytes;
++ unsigned int meta_blk_height;
++ unsigned int meta_blk_width;
++ unsigned int meta_surface_bytes;
++ unsigned int vmpg_bytes;
++ unsigned int meta_pte_req_per_frame_ub;
++ unsigned int meta_pte_bytes_per_frame_ub;
++ const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes);
++ const unsigned int dpte_buf_in_pte_reqs =
++ mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
++ const unsigned int pde_proc_buffer_size_64k_reqs =
++ mode_lib->ip.pde_proc_buffer_size_64k_reqs;
++
++ unsigned int log2_vmpg_height = 0;
++ unsigned int log2_vmpg_width = 0;
++ unsigned int log2_dpte_req_height_ptes = 0;
++ unsigned int log2_dpte_req_height = 0;
++ unsigned int log2_dpte_req_width = 0;
++ unsigned int log2_dpte_row_height_linear = 0;
++ unsigned int log2_dpte_row_height = 0;
++ unsigned int log2_dpte_group_width = 0;
++ unsigned int dpte_row_width_ub = 0;
++ unsigned int dpte_req_height = 0;
++ unsigned int dpte_req_width = 0;
++ unsigned int dpte_group_width = 0;
++ unsigned int log2_dpte_group_bytes = 0;
++ unsigned int log2_dpte_group_length = 0;
++ unsigned int pde_buf_entries;
++ bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10);
++
++ Calculate256BBlockSizes(
++ (enum source_format_class) (source_format),
++ (enum dm_swizzle_mode) (tiling),
++ bytes_per_element_y,
++ bytes_per_element_c,
++ &blk256_height_y,
++ &blk256_height_c,
++ &blk256_width_y,
++ &blk256_width_c);
++
++ if (!is_chroma) {
++ blk256_width = blk256_width_y;
++ blk256_height = blk256_height_y;
++ bytes_per_element = bytes_per_element_y;
++ } else {
++ blk256_width = blk256_width_c;
++ blk256_height = blk256_height_c;
++ bytes_per_element = bytes_per_element_c;
++ }
++
++ log2_bytes_per_element = dml_log2(bytes_per_element);
++
++ dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear);
++ dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert);
++ dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width);
++ dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height);
++
++ log2_blk256_width = dml_log2((double) blk256_width);
++ log2_blk256_height = dml_log2((double) blk256_height);
++ blk_bytes = surf_linear ?
++ 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
++ log2_blk_bytes = dml_log2((double) blk_bytes);
++ log2_blk_height = 0;
++ log2_blk_width = 0;
++
++ // remember log rule
++ // "+" in log is multiply
++ // "-" in log is divide
++ // "/2" is like square root
++ // blk is vertical biased
++ if (tiling != dm_sw_linear)
++ log2_blk_height = log2_blk256_height
++ + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
++ else
++ log2_blk_height = 0; // blk height of 1
++
++ log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
++
++ if (!surf_vert) {
++ rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1)
++ + blk256_width;
++ rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width;
++ } else {
++ rq_dlg_param->swath_width_ub = dml_round_to_multiple(
++ vp_height - 1,
++ blk256_height,
++ 1) + blk256_height;
++ rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height;
++ }
++
++ if (!surf_vert)
++ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height
++ * bytes_per_element;
++ else
++ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width
++ * bytes_per_element;
++
++ rq_misc_param->blk256_height = blk256_height;
++ rq_misc_param->blk256_width = blk256_width;
++
++ // -------
++ // meta
++ // -------
++ log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element
++
++ // each 64b meta request for dcn is 8x8 meta elements and
++ // a meta element covers one 256b block of the the data surface.
++ log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256
++ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element
++ - log2_meta_req_height;
++ meta_req_width = 1 << log2_meta_req_width;
++ meta_req_height = 1 << log2_meta_req_height;
++ log2_meta_row_height = 0;
++ meta_row_width_ub = 0;
++
++ // the dimensions of a meta row are meta_row_width x meta_row_height in elements.
++ // calculate upper bound of the meta_row_width
++ if (!surf_vert) {
++ log2_meta_row_height = log2_meta_req_height;
++ meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1)
++ + meta_req_width;
++ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width;
++ } else {
++ log2_meta_row_height = log2_meta_req_width;
++ meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1)
++ + meta_req_height;
++ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height;
++ }
++ rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64;
++
++ rq_dlg_param->meta_row_height = 1 << log2_meta_row_height;
++
++ log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes);
++ log2_meta_chunk_height = log2_meta_row_height;
++
++ //full sized meta chunk width in unit of data elements
++ log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element
++ - log2_meta_chunk_height;
++ log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes);
++ min_meta_chunk_width = 1
++ << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element
++ - log2_meta_chunk_height);
++ meta_chunk_width = 1 << log2_meta_chunk_width;
++ meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width);
++ meta_row_remainder = meta_row_width_ub % meta_chunk_width;
++ meta_chunk_threshold = 0;
++ meta_blk_bytes = 4096;
++ meta_blk_height = blk256_height * 64;
++ meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height;
++ meta_surface_bytes = meta_pitch
++ * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1)
++ + meta_blk_height) * bytes_per_element / 256;
++ vmpg_bytes = mode_lib->soc.vmm_page_size_bytes;
++ meta_pte_req_per_frame_ub = (dml_round_to_multiple(
++ meta_surface_bytes - vmpg_bytes,
++ 8 * vmpg_bytes,
++ 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes);
++ meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request
++ rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub;
++
++ dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height);
++ dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width);
++ dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes);
++ dml_print(
++ "DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n",
++ __func__,
++ meta_pte_req_per_frame_ub);
++ dml_print(
++ "DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n",
++ __func__,
++ meta_pte_bytes_per_frame_ub);
++
++ if (!surf_vert)
++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width;
++ else
++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height;
++
++ if (meta_row_remainder <= meta_chunk_threshold)
++ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
++ else
++ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
++
++ // ------
++ // dpte
++ // ------
++ if (surf_linear) {
++ log2_vmpg_height = 0; // one line high
++ } else {
++ log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
++ }
++ log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
++
++ // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4.
++ if (surf_linear) { //one 64B PTE request returns 8 PTEs
++ log2_dpte_req_height_ptes = 0;
++ log2_dpte_req_width = log2_vmpg_width + 3;
++ log2_dpte_req_height = 0;
++ } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size
++ //one 64B req gives 8x1 PTEs for 4KB tile
++ log2_dpte_req_height_ptes = 0;
++ log2_dpte_req_width = log2_blk_width + 3;
++ log2_dpte_req_height = log2_blk_height + 0;
++ } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB
++ //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB
++ log2_dpte_req_height_ptes = 4;
++ log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width
++ log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height
++ } else { //64KB page size and must 64KB tile block
++ //one 64B req gives 8x1 PTEs for 64KB tile
++ log2_dpte_req_height_ptes = 0;
++ log2_dpte_req_width = log2_blk_width + 3;
++ log2_dpte_req_height = log2_blk_height + 0;
++ }
++
++ // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height
++ // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent
++ // That depends on the pte shape (i.e. 8x1, 4x2, 2x4)
++ //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
++ //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
++ dpte_req_height = 1 << log2_dpte_req_height;
++ dpte_req_width = 1 << log2_dpte_req_width;
++
++ // calculate pitch dpte row buffer can hold
++ // round the result down to a power of two.
++ pde_buf_entries =
++ yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs;
++ if (surf_linear) {
++ unsigned int dpte_row_height;
++
++ log2_dpte_row_height_linear = dml_floor(
++ dml_log2(
++ dml_min(
++ 64 * 1024 * pde_buf_entries
++ / bytes_per_element,
++ dpte_buf_in_pte_reqs
++ * dpte_req_width)
++ / data_pitch),
++ 1);
++
++ ASSERT(log2_dpte_row_height_linear >= 3);
++
++ if (log2_dpte_row_height_linear > 7)
++ log2_dpte_row_height_linear = 7;
++
++ log2_dpte_row_height = log2_dpte_row_height_linear;
++ // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary.
++ // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering.
++ dpte_row_height = 1 << log2_dpte_row_height;
++ dpte_row_width_ub = dml_round_to_multiple(
++ data_pitch * dpte_row_height - 1,
++ dpte_req_width,
++ 1) + dpte_req_width;
++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
++ } else {
++ // the upper bound of the dpte_row_width without dependency on viewport position follows.
++ // for tiled mode, row height is the same as req height and row store up to vp size upper bound
++ if (!surf_vert) {
++ log2_dpte_row_height = log2_dpte_req_height;
++ dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1)
++ + dpte_req_width;
++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
++ } else {
++ log2_dpte_row_height =
++ (log2_blk_width < log2_dpte_req_width) ?
++ log2_blk_width : log2_dpte_req_width;
++ dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1)
++ + dpte_req_height;
++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height;
++ }
++ }
++ if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB
++ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request
++ else
++ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request
++
++ rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
++
++ // the dpte_group_bytes is reduced for the specific case of vertical
++ // access of a tile surface that has dpte request of 8x1 ptes.
++
++ if (hostvm_enable)
++ rq_sizing_param->dpte_group_bytes = 512;
++ else {
++ if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
++ rq_sizing_param->dpte_group_bytes = 512;
++ else
++ //full size
++ rq_sizing_param->dpte_group_bytes = 2048;
++ }
++
++ //since pte request size is 64byte, the number of data pte requests per full sized group is as follows.
++ log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes);
++ log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests
++
++ // full sized data pte group width in elements
++ if (!surf_vert)
++ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width;
++ else
++ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height;
++
++ //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B
++ if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB
++ log2_dpte_group_width = log2_dpte_group_width - 1;
++
++ dpte_group_width = 1 << log2_dpte_group_width;
++
++ // since dpte groups are only aligned to dpte_req_width and not dpte_group_width,
++ // the upper bound for the dpte groups per row is as follows.
++ rq_dlg_param->dpte_groups_per_row_ub = dml_ceil(
++ (double) dpte_row_width_ub / dpte_group_width,
++ 1);
++}
++
++static void get_surf_rq_param(
++ struct display_mode_lib *mode_lib,
++ display_data_rq_sizing_params_st *rq_sizing_param,
++ display_data_rq_dlg_params_st *rq_dlg_param,
++ display_data_rq_misc_params_st *rq_misc_param,
++ const display_pipe_params_st pipe_param,
++ bool is_chroma)
++{
++ bool mode_422 = 0;
++ unsigned int vp_width = 0;
++ unsigned int vp_height = 0;
++ unsigned int data_pitch = 0;
++ unsigned int meta_pitch = 0;
++ unsigned int ppe = mode_422 ? 2 : 1;
++
++ // FIXME check if ppe apply for both luma and chroma in 422 case
++ if (is_chroma) {
++ vp_width = pipe_param.src.viewport_width_c / ppe;
++ vp_height = pipe_param.src.viewport_height_c;
++ data_pitch = pipe_param.src.data_pitch_c;
++ meta_pitch = pipe_param.src.meta_pitch_c;
++ } else {
++ vp_width = pipe_param.src.viewport_width / ppe;
++ vp_height = pipe_param.src.viewport_height;
++ data_pitch = pipe_param.src.data_pitch;
++ meta_pitch = pipe_param.src.meta_pitch;
++ }
++
++ if (pipe_param.dest.odm_combine) {
++ unsigned int access_dir;
++ unsigned int full_src_vp_width;
++ unsigned int hactive_half;
++ unsigned int src_hactive_half;
++ access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
++ hactive_half = pipe_param.dest.hactive / 2;
++ if (is_chroma) {
++ full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width;
++ src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_half;
++ } else {
++ full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width;
++ src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio * hactive_half;
++ }
++
++ if (access_dir == 0) {
++ vp_width = dml_min(full_src_vp_width, src_hactive_half);
++ dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width);
++ } else {
++ vp_height = dml_min(full_src_vp_width, src_hactive_half);
++ dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height);
++
++ }
++ dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width);
++ dml_print("DML_DLG: %s: hactive_half = %d\n", __func__, hactive_half);
++ dml_print("DML_DLG: %s: src_hactive_half = %d\n", __func__, src_hactive_half);
++ }
++ rq_sizing_param->chunk_bytes = 8192;
++
++ if (rq_sizing_param->chunk_bytes == 64 * 1024)
++ rq_sizing_param->min_chunk_bytes = 0;
++ else
++ rq_sizing_param->min_chunk_bytes = 1024;
++
++ rq_sizing_param->meta_chunk_bytes = 2048;
++ rq_sizing_param->min_meta_chunk_bytes = 256;
++
++ if (pipe_param.src.hostvm)
++ rq_sizing_param->mpte_group_bytes = 512;
++ else
++ rq_sizing_param->mpte_group_bytes = 2048;
++
++ get_meta_and_pte_attr(
++ mode_lib,
++ rq_dlg_param,
++ rq_misc_param,
++ rq_sizing_param,
++ vp_width,
++ vp_height,
++ data_pitch,
++ meta_pitch,
++ pipe_param.src.source_format,
++ pipe_param.src.sw_mode,
++ pipe_param.src.macro_tile_size,
++ pipe_param.src.source_scan,
++ pipe_param.src.hostvm,
++ is_chroma);
++}
++
++static void dml_rq_dlg_get_rq_params(
++ struct display_mode_lib *mode_lib,
++ display_rq_params_st *rq_param,
++ const display_pipe_params_st pipe_param)
++{
++ // get param for luma surface
++ rq_param->yuv420 = pipe_param.src.source_format == dm_420_8
++ || pipe_param.src.source_format == dm_420_10;
++ rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10;
++
++ get_surf_rq_param(
++ mode_lib,
++ &(rq_param->sizing.rq_l),
++ &(rq_param->dlg.rq_l),
++ &(rq_param->misc.rq_l),
++ pipe_param,
++ 0);
++
++ if (is_dual_plane((enum source_format_class) (pipe_param.src.source_format))) {
++ // get param for chroma surface
++ get_surf_rq_param(
++ mode_lib,
++ &(rq_param->sizing.rq_c),
++ &(rq_param->dlg.rq_c),
++ &(rq_param->misc.rq_c),
++ pipe_param,
++ 1);
++ }
++
++ // calculate how to split the det buffer space between luma and chroma
++ handle_det_buf_split(mode_lib, rq_param, pipe_param.src);
++ print__rq_params_st(mode_lib, *rq_param);
++}
++
++void dml21_rq_dlg_get_rq_reg(
++ struct display_mode_lib *mode_lib,
++ display_rq_regs_st *rq_regs,
++ const display_pipe_params_st pipe_param)
++{
++ display_rq_params_st rq_param = {0};
++
++ memset(rq_regs, 0, sizeof(*rq_regs));
++ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param);
++ extract_rq_regs(mode_lib, rq_regs, rq_param);
++
++ print__rq_regs_st(mode_lib, *rq_regs);
++}
++
++// Note: currently taken in as is.
++// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
++static void dml_rq_dlg_get_dlg_params(
++ struct display_mode_lib *mode_lib,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
++ const unsigned int num_pipes,
++ const unsigned int pipe_idx,
++ display_dlg_regs_st *disp_dlg_regs,
++ display_ttu_regs_st *disp_ttu_regs,
++ const display_rq_dlg_params_st rq_dlg_param,
++ const display_dlg_sys_params_st dlg_sys_param,
++ const bool cstate_en,
++ const bool pstate_en)
++{
++ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
++ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
++ const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
++ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
++ const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
++ const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
++
++ // -------------------------
++ // Section 1.15.2.1: OTG dependent Params
++ // -------------------------
++ // Timing
++ unsigned int htotal = dst->htotal;
++ // unsigned int hblank_start = dst.hblank_start; // TODO: Remove
++ unsigned int hblank_end = dst->hblank_end;
++ unsigned int vblank_start = dst->vblank_start;
++ unsigned int vblank_end = dst->vblank_end;
++ unsigned int min_vblank = mode_lib->ip.min_vblank_lines;
++
++ double dppclk_freq_in_mhz = clks->dppclk_mhz;
++ double dispclk_freq_in_mhz = clks->dispclk_mhz;
++ double refclk_freq_in_mhz = clks->refclk_mhz;
++ double pclk_freq_in_mhz = dst->pixel_rate_mhz;
++ bool interlaced = dst->interlaced;
++
++ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
++
++ double min_dcfclk_mhz;
++ double t_calc_us;
++ double min_ttu_vblank;
++
++ double min_dst_y_ttu_vblank;
++ unsigned int dlg_vblank_start;
++ bool dual_plane;
++ bool mode_422;
++ unsigned int access_dir;
++ unsigned int vp_height_l;
++ unsigned int vp_width_l;
++ unsigned int vp_height_c;
++ unsigned int vp_width_c;
++
++ // Scaling
++ unsigned int htaps_l;
++ unsigned int htaps_c;
++ double hratio_l;
++ double hratio_c;
++ double vratio_l;
++ double vratio_c;
++ bool scl_enable;
++
++ double line_time_in_us;
++ // double vinit_l;
++ // double vinit_c;
++ // double vinit_bot_l;
++ // double vinit_bot_c;
++
++ // unsigned int swath_height_l;
++ unsigned int swath_width_ub_l;
++ // unsigned int dpte_bytes_per_row_ub_l;
++ unsigned int dpte_groups_per_row_ub_l;
++ // unsigned int meta_pte_bytes_per_frame_ub_l;
++ // unsigned int meta_bytes_per_row_ub_l;
++
++ // unsigned int swath_height_c;
++ unsigned int swath_width_ub_c;
++ // unsigned int dpte_bytes_per_row_ub_c;
++ unsigned int dpte_groups_per_row_ub_c;
++
++ unsigned int meta_chunks_per_row_ub_l;
++ unsigned int meta_chunks_per_row_ub_c;
++ unsigned int vupdate_offset;
++ unsigned int vupdate_width;
++ unsigned int vready_offset;
++
++ unsigned int dppclk_delay_subtotal;
++ unsigned int dispclk_delay_subtotal;
++ unsigned int pixel_rate_delay_subtotal;
++
++ unsigned int vstartup_start;
++ unsigned int dst_x_after_scaler;
++ unsigned int dst_y_after_scaler;
++ double line_wait;
++ double dst_y_prefetch;
++ double dst_y_per_vm_vblank;
++ double dst_y_per_row_vblank;
++ double dst_y_per_vm_flip;
++ double dst_y_per_row_flip;
++ double max_dst_y_per_vm_vblank;
++ double max_dst_y_per_row_vblank;
++ double lsw;
++ double vratio_pre_l;
++ double vratio_pre_c;
++ unsigned int req_per_swath_ub_l;
++ unsigned int req_per_swath_ub_c;
++ unsigned int meta_row_height_l;
++ unsigned int meta_row_height_c;
++ unsigned int swath_width_pixels_ub_l;
++ unsigned int swath_width_pixels_ub_c;
++ unsigned int scaler_rec_in_width_l;
++ unsigned int scaler_rec_in_width_c;
++ unsigned int dpte_row_height_l;
++ unsigned int dpte_row_height_c;
++ double hscale_pixel_rate_l;
++ double hscale_pixel_rate_c;
++ double min_hratio_fact_l;
++ double min_hratio_fact_c;
++ double refcyc_per_line_delivery_pre_l;
++ double refcyc_per_line_delivery_pre_c;
++ double refcyc_per_line_delivery_l;
++ double refcyc_per_line_delivery_c;
++
++ double refcyc_per_req_delivery_pre_l;
++ double refcyc_per_req_delivery_pre_c;
++ double refcyc_per_req_delivery_l;
++ double refcyc_per_req_delivery_c;
++
++ unsigned int full_recout_width;
++ double xfc_transfer_delay;
++ double xfc_precharge_delay;
++ double xfc_remote_surface_flip_latency;
++ double xfc_dst_y_delta_drq_limit;
++ double xfc_prefetch_margin;
++ double refcyc_per_req_delivery_pre_cur0;
++ double refcyc_per_req_delivery_cur0;
++ double refcyc_per_req_delivery_pre_cur1;
++ double refcyc_per_req_delivery_cur1;
++
++ memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
++ memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
++
++ dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en);
++ dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en);
++
++ dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced);
++ ASSERT(ref_freq_to_pix_freq < 4.0);
++
++ disp_dlg_regs->ref_freq_to_pix_freq =
++ (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
++ disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal
++ * dml_pow(2, 8));
++ disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
++ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end
++ * (double) ref_freq_to_pix_freq);
++ ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
++
++ min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz;
++ t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes);
++ min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal;
++ dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
++
++ disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
++ ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
++
++ dml_print(
++ "DML_DLG: %s: min_dcfclk_mhz = %3.2f\n",
++ __func__,
++ min_dcfclk_mhz);
++ dml_print(
++ "DML_DLG: %s: min_ttu_vblank = %3.2f\n",
++ __func__,
++ min_ttu_vblank);
++ dml_print(
++ "DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n",
++ __func__,
++ min_dst_y_ttu_vblank);
++ dml_print(
++ "DML_DLG: %s: t_calc_us = %3.2f\n",
++ __func__,
++ t_calc_us);
++ dml_print(
++ "DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n",
++ __func__,
++ disp_dlg_regs->min_dst_y_next_start);
++ dml_print(
++ "DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n",
++ __func__,
++ ref_freq_to_pix_freq);
++
++ // -------------------------
++ // Section 1.15.2.2: Prefetch, Active and TTU
++ // -------------------------
++ // Prefetch Calc
++ // Source
++ // dcc_en = src.dcc;
++ dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
++ mode_422 = 0; // FIXME
++ access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
++ // bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
++ // bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
++ vp_height_l = src->viewport_height;
++ vp_width_l = src->viewport_width;
++ vp_height_c = src->viewport_height_c;
++ vp_width_c = src->viewport_width_c;
++
++ // Scaling
++ htaps_l = taps->htaps;
++ htaps_c = taps->htaps_c;
++ hratio_l = scl->hscl_ratio;
++ hratio_c = scl->hscl_ratio_c;
++ vratio_l = scl->vscl_ratio;
++ vratio_c = scl->vscl_ratio_c;
++ scl_enable = scl->scl_enable;
++
++ line_time_in_us = (htotal / pclk_freq_in_mhz);
++ swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub;
++ dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub;
++ swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub;
++ dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub;
++
++ meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub;
++ meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub;
++ vupdate_offset = dst->vupdate_offset;
++ vupdate_width = dst->vupdate_width;
++ vready_offset = dst->vready_offset;
++
++ dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
++ dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
++
++ if (scl_enable)
++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
++ else
++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
++
++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter
++ + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
++
++ if (dout->dsc_enable) {
++ double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ dispclk_delay_subtotal += dsc_delay;
++ }
++
++ pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz
++ + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz;
++
++ vstartup_start = dst->vstartup_start;
++ if (interlaced) {
++ if (vstartup_start / 2.0
++ - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal
++ <= vblank_end / 2.0)
++ disp_dlg_regs->vready_after_vcount0 = 1;
++ else
++ disp_dlg_regs->vready_after_vcount0 = 0;
++ } else {
++ if (vstartup_start
++ - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal
++ <= vblank_end)
++ disp_dlg_regs->vready_after_vcount0 = 1;
++ else
++ disp_dlg_regs->vready_after_vcount0 = 0;
++ }
++
++ // TODO: Where is this coming from?
++ if (interlaced)
++ vstartup_start = vstartup_start / 2;
++
++ // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp?
++ if (vstartup_start >= min_vblank) {
++ dml_print(
++ "WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n",
++ __func__,
++ vblank_start,
++ vblank_end);
++ dml_print(
++ "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n",
++ __func__,
++ vstartup_start,
++ min_vblank);
++ min_vblank = vstartup_start + 1;
++ dml_print(
++ "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n",
++ __func__,
++ vstartup_start,
++ min_vblank);
++ }
++
++ dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal);
++ dml_print(
++ "DML_DLG: %s: pixel_rate_delay_subtotal = %d\n",
++ __func__,
++ pixel_rate_delay_subtotal);
++ dml_print(
++ "DML_DLG: %s: dst_x_after_scaler = %d\n",
++ __func__,
++ dst_x_after_scaler);
++ dml_print(
++ "DML_DLG: %s: dst_y_after_scaler = %d\n",
++ __func__,
++ dst_y_after_scaler);
++
++ // Lwait
++ // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us?
++ line_wait = mode_lib->soc.urgent_latency_pixel_data_only_us;
++ if (cstate_en)
++ line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait);
++ if (pstate_en)
++ line_wait = dml_max(
++ mode_lib->soc.dram_clock_change_latency_us
++ + mode_lib->soc.urgent_latency_pixel_data_only_us, // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us?
++ line_wait);
++ line_wait = line_wait / line_time_in_us;
++
++ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
++
++ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++ dst_y_per_row_vblank = get_dst_y_per_row_vblank(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ max_dst_y_per_vm_vblank = 32.0;
++ max_dst_y_per_row_vblank = 16.0;
++
++ // magic!
++ if (htotal <= 75) {
++ min_vblank = 300;
++ max_dst_y_per_vm_vblank = 100.0;
++ max_dst_y_per_row_vblank = 100.0;
++ }
++
++ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
++ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
++ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
++ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
++
++ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank);
++ ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
++
++ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
++ lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank);
++
++ dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw);
++
++ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l);
++ dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c);
++
++ // Active
++ req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub;
++ req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub;
++ meta_row_height_l = rq_dlg_param.rq_l.meta_row_height;
++ meta_row_height_c = rq_dlg_param.rq_c.meta_row_height;
++ swath_width_pixels_ub_l = 0;
++ swath_width_pixels_ub_c = 0;
++ scaler_rec_in_width_l = 0;
++ scaler_rec_in_width_c = 0;
++ dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height;
++ dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height;
++
++ if (mode_422) {
++ swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
++ swath_width_pixels_ub_c = swath_width_ub_c * 2;
++ } else {
++ swath_width_pixels_ub_l = swath_width_ub_l * 1;
++ swath_width_pixels_ub_c = swath_width_ub_c * 1;
++ }
++
++ hscale_pixel_rate_l = 0.;
++ hscale_pixel_rate_c = 0.;
++ min_hratio_fact_l = 1.0;
++ min_hratio_fact_c = 1.0;
++
++ if (htaps_l <= 1)
++ min_hratio_fact_l = 2.0;
++ else if (htaps_l <= 6) {
++ if ((hratio_l * 2.0) > 4.0)
++ min_hratio_fact_l = 4.0;
++ else
++ min_hratio_fact_l = hratio_l * 2.0;
++ } else {
++ if (hratio_l > 4.0)
++ min_hratio_fact_l = 4.0;
++ else
++ min_hratio_fact_l = hratio_l;
++ }
++
++ hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
++
++ if (htaps_c <= 1)
++ min_hratio_fact_c = 2.0;
++ else if (htaps_c <= 6) {
++ if ((hratio_c * 2.0) > 4.0)
++ min_hratio_fact_c = 4.0;
++ else
++ min_hratio_fact_c = hratio_c * 2.0;
++ } else {
++ if (hratio_c > 4.0)
++ min_hratio_fact_c = 4.0;
++ else
++ min_hratio_fact_c = hratio_c;
++ }
++
++ hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz;
++
++ refcyc_per_line_delivery_pre_l = 0.;
++ refcyc_per_line_delivery_pre_c = 0.;
++ refcyc_per_line_delivery_l = 0.;
++ refcyc_per_line_delivery_c = 0.;
++
++ refcyc_per_req_delivery_pre_l = 0.;
++ refcyc_per_req_delivery_pre_c = 0.;
++ refcyc_per_req_delivery_l = 0.;
++ refcyc_per_req_delivery_c = 0.;
++
++ full_recout_width = 0;
++ // In ODM
++ if (src->is_hsplit) {
++ // This "hack" is only allowed (and valid) for MPC combine. In ODM
++ // combine, you MUST specify the full_recout_width...according to Oswin
++ if (dst->full_recout_width == 0 && !dst->odm_combine) {
++ dml_print(
++ "DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n",
++ __func__);
++ full_recout_width = dst->recout_width * 2; // assume half split for dcn1
++ } else
++ full_recout_width = dst->full_recout_width;
++ } else
++ full_recout_width = dst->recout_width;
++
++ // As of DCN2, mpc_combine and odm_combine are mutually exclusive
++ refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(
++ mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_pre_l,
++ hscale_pixel_rate_l,
++ swath_width_pixels_ub_l,
++ 1); // per line
++
++ refcyc_per_line_delivery_l = get_refcyc_per_delivery(
++ mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_l,
++ hscale_pixel_rate_l,
++ swath_width_pixels_ub_l,
++ 1); // per line
++
++ dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width);
++ dml_print(
++ "DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n",
++ __func__,
++ hscale_pixel_rate_l);
++ dml_print(
++ "DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n",
++ __func__,
++ refcyc_per_line_delivery_pre_l);
++ dml_print(
++ "DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n",
++ __func__,
++ refcyc_per_line_delivery_l);
++
++ if (dual_plane) {
++ refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(
++ mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_pre_c,
++ hscale_pixel_rate_c,
++ swath_width_pixels_ub_c,
++ 1); // per line
++
++ refcyc_per_line_delivery_c = get_refcyc_per_delivery(
++ mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_c,
++ hscale_pixel_rate_c,
++ swath_width_pixels_ub_c,
++ 1); // per line
++
++ dml_print(
++ "DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n",
++ __func__,
++ refcyc_per_line_delivery_pre_c);
++ dml_print(
++ "DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n",
++ __func__,
++ refcyc_per_line_delivery_c);
++ }
++
++ // TTU - Luma / Chroma
++ if (access_dir) { // vertical access
++ scaler_rec_in_width_l = vp_height_l;
++ scaler_rec_in_width_c = vp_height_c;
++ } else {
++ scaler_rec_in_width_l = vp_width_l;
++ scaler_rec_in_width_c = vp_width_c;
++ }
++
++ refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(
++ mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_pre_l,
++ hscale_pixel_rate_l,
++ scaler_rec_in_width_l,
++ req_per_swath_ub_l); // per req
++ refcyc_per_req_delivery_l = get_refcyc_per_delivery(
++ mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_l,
++ hscale_pixel_rate_l,
++ scaler_rec_in_width_l,
++ req_per_swath_ub_l); // per req
++
++ dml_print(
++ "DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n",
++ __func__,
++ refcyc_per_req_delivery_pre_l);
++ dml_print(
++ "DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n",
++ __func__,
++ refcyc_per_req_delivery_l);
++
++ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
++ ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
++
++ if (dual_plane) {
++ refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(
++ mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_pre_c,
++ hscale_pixel_rate_c,
++ scaler_rec_in_width_c,
++ req_per_swath_ub_c); // per req
++ refcyc_per_req_delivery_c = get_refcyc_per_delivery(
++ mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_c,
++ hscale_pixel_rate_c,
++ scaler_rec_in_width_c,
++ req_per_swath_ub_c); // per req
++
++ dml_print(
++ "DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n",
++ __func__,
++ refcyc_per_req_delivery_pre_c);
++ dml_print(
++ "DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n",
++ __func__,
++ refcyc_per_req_delivery_c);
++
++ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
++ ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
++ }
++
++ // XFC
++ xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ xfc_precharge_delay = get_xfc_precharge_delay(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++ xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++ xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency;
++ xfc_prefetch_margin = get_xfc_prefetch_margin(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++
++ // TTU - Cursor
++ refcyc_per_req_delivery_pre_cur0 = 0.0;
++ refcyc_per_req_delivery_cur0 = 0.0;
++ if (src->num_cursors > 0) {
++ calculate_ttu_cursor(
++ mode_lib,
++ &refcyc_per_req_delivery_pre_cur0,
++ &refcyc_per_req_delivery_cur0,
++ refclk_freq_in_mhz,
++ ref_freq_to_pix_freq,
++ hscale_pixel_rate_l,
++ scl->hscl_ratio,
++ vratio_pre_l,
++ vratio_l,
++ src->cur0_src_width,
++ (enum cursor_bpp) (src->cur0_bpp));
++ }
++
++ refcyc_per_req_delivery_pre_cur1 = 0.0;
++ refcyc_per_req_delivery_cur1 = 0.0;
++ if (src->num_cursors > 1) {
++ calculate_ttu_cursor(
++ mode_lib,
++ &refcyc_per_req_delivery_pre_cur1,
++ &refcyc_per_req_delivery_cur1,
++ refclk_freq_in_mhz,
++ ref_freq_to_pix_freq,
++ hscale_pixel_rate_l,
++ scl->hscl_ratio,
++ vratio_pre_l,
++ vratio_l,
++ src->cur1_src_width,
++ (enum cursor_bpp) (src->cur1_bpp));
++ }
++
++ // TTU - Misc
++ // all hard-coded
++
++ // Assignment to register structures
++ disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
++ disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
++ ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
++ disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
++ disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
++ disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
++ disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
++ disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
++
++ disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
++ disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
++
++ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
++ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
++ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
++ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
++
++ disp_dlg_regs->refcyc_per_pte_group_vblank_l =
++ (unsigned int) (dst_y_per_row_vblank * (double) htotal
++ * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
++ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
++
++ if (dual_plane) {
++ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank
++ * (double) htotal * ref_freq_to_pix_freq
++ / (double) dpte_groups_per_row_ub_c);
++ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c
++ < (unsigned int)dml_pow(2, 13));
++ }
++
++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l =
++ (unsigned int) (dst_y_per_row_vblank * (double) htotal
++ * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
++ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
++
++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c =
++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
++
++ disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal
++ * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l;
++ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal
++ * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l;
++
++ if (dual_plane) {
++ disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip
++ * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c;
++ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip
++ * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c;
++ }
++
++ disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
++ disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
++ disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;;
++ disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;;
++
++ // Clamp to max for now
++ if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
++
++ if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
++
++ if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
++
++ if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
++ disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l
++ / (double) vratio_l * dml_pow(2, 2));
++ ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
++
++ if (dual_plane) {
++ disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c
++ / (double) vratio_c * dml_pow(2, 2));
++ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
++ dml_print(
++ "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n",
++ __func__,
++ disp_dlg_regs->dst_y_per_pte_row_nom_c,
++ (unsigned int)dml_pow(2, 17) - 1);
++ }
++ }
++
++ disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l
++ / (double) vratio_l * dml_pow(2, 2));
++ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
++
++ disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
++
++ dml_print(
++ "DML: Trow: %fus\n",
++ line_time_in_us * (double)dpte_row_height_l / (double)vratio_l);
++
++ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l
++ / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
++ / (double) dpte_groups_per_row_ub_l);
++ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
++ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l
++ / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
++ / (double) meta_chunks_per_row_ub_l);
++ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
++
++ if (dual_plane) {
++ disp_dlg_regs->refcyc_per_pte_group_nom_c =
++ (unsigned int) ((double) dpte_row_height_c / (double) vratio_c
++ * (double) htotal * ref_freq_to_pix_freq
++ / (double) dpte_groups_per_row_ub_c);
++ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
++
++ // TODO: Is this the right calculation? Does htotal need to be halved?
++ disp_dlg_regs->refcyc_per_meta_chunk_nom_c =
++ (unsigned int) ((double) meta_row_height_c / (double) vratio_c
++ * (double) htotal * ref_freq_to_pix_freq
++ / (double) meta_chunks_per_row_ub_c);
++ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
++ }
++
++ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(
++ refcyc_per_line_delivery_pre_l, 1);
++ disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(
++ refcyc_per_line_delivery_l, 1);
++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13));
++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
++
++ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(
++ refcyc_per_line_delivery_pre_c, 1);
++ disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(
++ refcyc_per_line_delivery_c, 1);
++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13));
++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
++
++ disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
++ disp_dlg_regs->dst_y_offset_cur0 = 0;
++ disp_dlg_regs->chunk_hdl_adjust_cur1 = 3;
++ disp_dlg_regs->dst_y_offset_cur1 = 0;
++
++ disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay;
++ disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay;
++ disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency;
++ disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil(
++ xfc_prefetch_margin * refclk_freq_in_mhz, 1);
++
++ // slave has to have this value also set to off
++ if (src->xfc_enable && !src->xfc_slave)
++ disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1);
++ else
++ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
++
++ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 =
++ (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 =
++ (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1
++ * dml_pow(2, 10));
++ disp_ttu_regs->qos_level_low_wm = 0;
++ ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14));
++ disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal
++ * ref_freq_to_pix_freq);
++ ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));
++
++ disp_ttu_regs->qos_level_flip = 14;
++ disp_ttu_regs->qos_level_fixed_l = 8;
++ disp_ttu_regs->qos_level_fixed_c = 8;
++ disp_ttu_regs->qos_level_fixed_cur0 = 8;
++ disp_ttu_regs->qos_ramp_disable_l = 0;
++ disp_ttu_regs->qos_ramp_disable_c = 0;
++ disp_ttu_regs->qos_ramp_disable_cur0 = 0;
++
++ disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
++ ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));
++
++ print__ttu_regs_st(mode_lib, *disp_ttu_regs);
++ print__dlg_regs_st(mode_lib, *disp_dlg_regs);
++}
++
++void dml21_rq_dlg_get_dlg_reg(
++ struct display_mode_lib *mode_lib,
++ display_dlg_regs_st *dlg_regs,
++ display_ttu_regs_st *ttu_regs,
++ display_e2e_pipe_params_st *e2e_pipe_param,
++ const unsigned int num_pipes,
++ const unsigned int pipe_idx,
++ const bool cstate_en,
++ const bool pstate_en,
++ const bool vm_en,
++ const bool ignore_viewport_pos,
++ const bool immediate_flip_support)
++{
++ display_rq_params_st rq_param = {0};
++ display_dlg_sys_params_st dlg_sys_param = {0};
++
++ // Get watermark and Tex.
++ dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes);
++ dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes);
++ dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes);
++ dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
++ / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
++
++ print__dlg_sys_params_st(mode_lib, dlg_sys_param);
++
++ // system parameter calculation done
++
++ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
++ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe);
++ dml_rq_dlg_get_dlg_params(
++ mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx,
++ dlg_regs,
++ ttu_regs,
++ rq_param.dlg,
++ dlg_sys_param,
++ cstate_en,
++ pstate_en);
++ dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
++}
++
++void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param)
++{
++ memset(arb_param, 0, sizeof(*arb_param));
++ arb_param->max_req_outstanding = 256;
++ arb_param->min_req_outstanding = 68;
++ arb_param->sat_level_us = 60;
++}
++
++static void calculate_ttu_cursor(
++ struct display_mode_lib *mode_lib,
++ double *refcyc_per_req_delivery_pre_cur,
++ double *refcyc_per_req_delivery_cur,
++ double refclk_freq_in_mhz,
++ double ref_freq_to_pix_freq,
++ double hscale_pixel_rate_l,
++ double hscl_ratio,
++ double vratio_pre_l,
++ double vratio_l,
++ unsigned int cur_width,
++ enum cursor_bpp cur_bpp)
++{
++ unsigned int cur_src_width = cur_width;
++ unsigned int cur_req_size = 0;
++ unsigned int cur_req_width = 0;
++ double cur_width_ub = 0.0;
++ double cur_req_per_width = 0.0;
++ double hactive_cur = 0.0;
++
++ ASSERT(cur_src_width <= 256);
++
++ *refcyc_per_req_delivery_pre_cur = 0.0;
++ *refcyc_per_req_delivery_cur = 0.0;
++ if (cur_src_width > 0) {
++ unsigned int cur_bit_per_pixel = 0;
++
++ if (cur_bpp == dm_cur_2bit) {
++ cur_req_size = 64; // byte
++ cur_bit_per_pixel = 2;
++ } else { // 32bit
++ cur_bit_per_pixel = 32;
++ if (cur_src_width >= 1 && cur_src_width <= 16)
++ cur_req_size = 64;
++ else if (cur_src_width >= 17 && cur_src_width <= 31)
++ cur_req_size = 128;
++ else
++ cur_req_size = 256;
++ }
++
++ cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0);
++ cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1)
++ * (double) cur_req_width;
++ cur_req_per_width = cur_width_ub / (double) cur_req_width;
++ hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
++
++ if (vratio_pre_l <= 1.0) {
++ *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq
++ / (double) cur_req_per_width;
++ } else {
++ *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz
++ * (double) cur_src_width / hscale_pixel_rate_l
++ / (double) cur_req_per_width;
++ }
++
++ ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13));
++
++ if (vratio_l <= 1.0) {
++ *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq
++ / (double) cur_req_per_width;
++ } else {
++ *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz
++ * (double) cur_src_width / hscale_pixel_rate_l
++ / (double) cur_req_per_width;
++ }
++
++ dml_print(
++ "DML_DLG: %s: cur_req_width = %d\n",
++ __func__,
++ cur_req_width);
++ dml_print(
++ "DML_DLG: %s: cur_width_ub = %3.2f\n",
++ __func__,
++ cur_width_ub);
++ dml_print(
++ "DML_DLG: %s: cur_req_per_width = %3.2f\n",
++ __func__,
++ cur_req_per_width);
++ dml_print(
++ "DML_DLG: %s: hactive_cur = %3.2f\n",
++ __func__,
++ hactive_cur);
++ dml_print(
++ "DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n",
++ __func__,
++ *refcyc_per_req_delivery_pre_cur);
++ dml_print(
++ "DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n",
++ __func__,
++ *refcyc_per_req_delivery_cur);
++
++ ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13));
++ }
++}
++
++#endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
+new file mode 100644
+index 000000000000..83e95f8cbff2
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
+@@ -0,0 +1,73 @@
++/*
++ * Copyright 2017 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#ifndef __DML21_DISPLAY_RQ_DLG_CALC_H__
++#define __DML21_DISPLAY_RQ_DLG_CALC_H__
++
++#include "../dml_common_defs.h"
++#include "../display_rq_dlg_helpers.h"
++
++struct display_mode_lib;
++
++
++// Function: dml_rq_dlg_get_rq_reg
++// Main entry point for test to get the register values out of this DML class.
++// This function calls <get_rq_param> and <extract_rq_regs> functions to calculate
++// and then populate the rq_regs struct
++// Input:
++// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.)
++// Output:
++// rq_regs - struct that holds all the RQ registers field value.
++// See also: <display_rq_regs_st>
++void dml21_rq_dlg_get_rq_reg(
++ struct display_mode_lib *mode_lib,
++ display_rq_regs_st *rq_regs,
++ const display_pipe_params_st pipe_param);
++
++// Function: dml_rq_dlg_get_dlg_reg
++// Calculate and return DLG and TTU register struct given the system setting
++// Output:
++// dlg_regs - output DLG register struct
++// ttu_regs - output DLG TTU register struct
++// Input:
++// e2e_pipe_param - "compacted" array of e2e pipe param struct
++// num_pipes - num of active "pipe" or "route"
++// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
++// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered.
++// Added for legacy or unrealistic timing tests.
++void dml21_rq_dlg_get_dlg_reg(
++ struct display_mode_lib *mode_lib,
++ display_dlg_regs_st *dlg_regs,
++ display_ttu_regs_st *ttu_regs,
++ display_e2e_pipe_params_st *e2e_pipe_param,
++ const unsigned int num_pipes,
++ const unsigned int pipe_idx,
++ const bool cstate_en,
++ const bool pstate_en,
++ const bool vm_en,
++ const bool ignore_viewport_pos,
++ const bool immediate_flip_support);
++
++#endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+index 870716e3c132..d8c59aa356b6 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+@@ -38,6 +38,9 @@ enum dml_project {
+ DML_PROJECT_NAVI10,
+ DML_PROJECT_NAVI10v2,
+ #endif
++#ifdef CONFIG_DRM_AMD_DC_DCN2_1
++ DML_PROJECT_DCN21,
++#endif
+ };
+
+ struct display_mode_lib;
+--
+2.17.1
+