diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch | 8131 |
1 files changed, 8131 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch new file mode 100644 index 00000000..ed24a880 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3642-drm-amd-display-Add-Renoir-DML.patch @@ -0,0 +1,8131 @@ +From 847b6b99af63802da2d600fe136ba893f75b0288 Mon Sep 17 00:00:00 2001 +From: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com> +Date: Fri, 26 Jul 2019 16:52:06 -0400 +Subject: [PATCH 3642/4256] drm/amd/display: Add Renoir DML + +DML provides the display configuration validation as provided +by the hw teams. + +Acked-by: Harry Wentland <harry.wentland@amd.com> +Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 + + .../dc/dml/dcn21/display_mode_vba_21.c | 6123 +++++++++++++++++ + .../dc/dml/dcn21/display_mode_vba_21.h | 32 + + .../dc/dml/dcn21/display_rq_dlg_calc_21.c | 1823 +++++ + .../dc/dml/dcn21/display_rq_dlg_calc_21.h | 73 + + .../drm/amd/display/dc/dml/display_mode_lib.h | 3 + + 6 files changed, 8058 insertions(+) + create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h + +diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile +index 95fd2beca80c..b267c0fc64e7 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile ++++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile +@@ -45,6 +45,10 @@ CFLAGS_display_rq_dlg_calc_20.o := $(dml_ccflags) + CFLAGS_display_mode_vba_20v2.o := $(dml_ccflags) + CFLAGS_display_rq_dlg_calc_20v2.o := $(dml_ccflags) + endif ++ifdef CONFIG_DRM_AMD_DC_DCN2_1 ++CFLAGS_display_mode_vba_21.o := $(dml_ccflags) ++CFLAGS_display_rq_dlg_calc_21.o := $(dml_ccflags) ++endif + ifdef CONFIG_DRM_AMD_DCN3AG + CFLAGS_display_mode_vba_3ag.o := $(dml_ccflags) + endif +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +new file mode 100644 +index 000000000000..456cd0e3289c +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +@@ -0,0 +1,6123 @@ ++/* ++ * Copyright 2017 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#ifdef CONFIG_DRM_AMD_DC_DCN2_0 ++ ++#include "../display_mode_lib.h" ++#include "../dml_inline_defs.h" ++#include "../display_mode_vba.h" ++#include "display_mode_vba_21.h" ++ ++ ++/* ++ * NOTE: ++ * This file is gcc-parsable HW gospel, coming straight from HW engineers. ++ * ++ * It doesn't adhere to Linux kernel style and sometimes will do things in odd ++ * ways. Unless there is something clearly wrong with it the code should ++ * remain as-is as it provides us with a guarantee from HW that it is correct. ++ */ ++ ++typedef unsigned int uint; ++ ++typedef struct { ++ double DPPCLK; ++ double DISPCLK; ++ double PixelClock; ++ double DCFCLKDeepSleep; ++ unsigned int DPPPerPlane; ++ bool ScalerEnabled; ++ enum scan_direction_class SourceScan; ++ unsigned int BlockWidth256BytesY; ++ unsigned int BlockHeight256BytesY; ++ unsigned int BlockWidth256BytesC; ++ unsigned int BlockHeight256BytesC; ++ unsigned int InterlaceEnable; ++ unsigned int NumberOfCursors; ++ unsigned int VBlank; ++ unsigned int HTotal; ++} Pipe; ++ ++typedef struct { ++ bool Enable; ++ unsigned int MaxPageTableLevels; ++ unsigned int CachedPageTableLevels; ++} HostVM; ++ ++#define BPP_INVALID 0 ++#define BPP_BLENDED_PIPE 0xffffffff ++ ++static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); ++static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( ++ struct display_mode_lib *mode_lib); ++static unsigned int dscceComputeDelay( ++ unsigned int bpc, ++ double bpp, ++ unsigned int sliceWidth, ++ unsigned int numSlices, ++ enum output_format_class pixelFormat); ++static unsigned int dscComputeDelay(enum output_format_class pixelFormat); ++// Super monster function with some 45 argument ++static bool CalculatePrefetchSchedule( ++ struct display_mode_lib *mode_lib, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ Pipe *myPipe, ++ unsigned int DSCDelay, ++ double DPPCLKDelaySubtotal, ++ double DPPCLKDelaySCL, ++ double DPPCLKDelaySCLLBOnly, ++ double DPPCLKDelayCNVCFormater, ++ double DPPCLKDelayCNVCCursor, ++ double DISPCLKDelaySubtotal, ++ unsigned int ScalerRecoutWidth, ++ enum output_format_class OutputFormat, ++ unsigned int MaxInterDCNTileRepeaters, ++ unsigned int VStartup, ++ unsigned int MaxVStartup, ++ unsigned int GPUVMPageTableLevels, ++ bool GPUVMEnable, ++ HostVM *myHostVM, ++ bool DynamicMetadataEnable, ++ int DynamicMetadataLinesBeforeActiveRequired, ++ unsigned int DynamicMetadataTransmittedBytes, ++ bool DCCEnable, ++ double UrgentLatency, ++ double UrgentExtraLatency, ++ double TCalc, ++ unsigned int PDEAndMetaPTEBytesFrame, ++ unsigned int MetaRowByte, ++ unsigned int PixelPTEBytesPerRow, ++ double PrefetchSourceLinesY, ++ unsigned int SwathWidthY, ++ double BytePerPixelDETY, ++ double VInitPreFillY, ++ unsigned int MaxNumSwathY, ++ double PrefetchSourceLinesC, ++ double BytePerPixelDETC, ++ double VInitPreFillC, ++ unsigned int MaxNumSwathC, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ double TWait, ++ bool XFCEnabled, ++ double XFCRemoteSurfaceFlipDelay, ++ bool ProgressiveToInterlaceUnitInOPP, ++ double *DSTXAfterScaler, ++ double *DSTYAfterScaler, ++ double *DestinationLinesForPrefetch, ++ double *PrefetchBandwidth, ++ double *DestinationLinesToRequestVMInVBlank, ++ double *DestinationLinesToRequestRowInVBlank, ++ double *VRatioPrefetchY, ++ double *VRatioPrefetchC, ++ double *RequiredPrefetchPixDataBWLuma, ++ double *RequiredPrefetchPixDataBWChroma, ++ unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, ++ double *Tno_bw, ++ double *prefetch_vmrow_bw, ++ unsigned int *swath_width_luma_ub, ++ unsigned int *swath_width_chroma_ub, ++ unsigned int *VUpdateOffsetPix, ++ double *VUpdateWidthPix, ++ double *VReadyOffsetPix); ++static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); ++static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); ++static double CalculateDCCConfiguration( ++ bool DCCEnabled, ++ bool DCCProgrammingAssumesScanDirectionUnknown, ++ unsigned int ViewportWidth, ++ unsigned int ViewportHeight, ++ double DETBufferSize, ++ unsigned int RequestHeight256Byte, ++ unsigned int SwathHeight, ++ enum dm_swizzle_mode TilingFormat, ++ unsigned int BytePerPixel, ++ enum scan_direction_class ScanOrientation, ++ unsigned int *MaxUncompressedBlock, ++ unsigned int *MaxCompressedBlock, ++ unsigned int *Independent64ByteBlock); ++static double CalculatePrefetchSourceLines( ++ struct display_mode_lib *mode_lib, ++ double VRatio, ++ double vtaps, ++ bool Interlace, ++ bool ProgressiveToInterlaceUnitInOPP, ++ unsigned int SwathHeight, ++ unsigned int ViewportYStart, ++ double *VInitPreFill, ++ unsigned int *MaxNumSwath); ++static unsigned int CalculateVMAndRowBytes( ++ struct display_mode_lib *mode_lib, ++ bool DCCEnable, ++ unsigned int BlockHeight256Bytes, ++ unsigned int BlockWidth256Bytes, ++ enum source_format_class SourcePixelFormat, ++ unsigned int SurfaceTiling, ++ unsigned int BytePerPixel, ++ enum scan_direction_class ScanDirection, ++ unsigned int ViewportWidth, ++ unsigned int ViewportHeight, ++ unsigned int SwathWidthY, ++ bool GPUVMEnable, ++ bool HostVMEnable, ++ unsigned int HostVMMaxPageTableLevels, ++ unsigned int HostVMCachedPageTableLevels, ++ unsigned int VMMPageSize, ++ unsigned int PTEBufferSizeInRequests, ++ unsigned int Pitch, ++ unsigned int DCCMetaPitch, ++ unsigned int *MacroTileWidth, ++ unsigned int *MetaRowByte, ++ unsigned int *PixelPTEBytesPerRow, ++ bool *PTEBufferSizeNotExceeded, ++ unsigned int *dpte_row_width_ub, ++ unsigned int *dpte_row_height, ++ unsigned int *MetaRequestWidth, ++ unsigned int *MetaRequestHeight, ++ unsigned int *meta_row_width, ++ unsigned int *meta_row_height, ++ unsigned int *vm_group_bytes, ++ long *dpte_group_bytes, ++ unsigned int *PixelPTEReqWidth, ++ unsigned int *PixelPTEReqHeight, ++ unsigned int *PTERequestSize, ++ unsigned int *DPDE0BytesFrame, ++ unsigned int *MetaPTEBytesFrame); ++ ++static double CalculateTWait( ++ unsigned int PrefetchMode, ++ double DRAMClockChangeLatency, ++ double UrgentLatency, ++ double SREnterPlusExitTime); ++static double CalculateRemoteSurfaceFlipDelay( ++ struct display_mode_lib *mode_lib, ++ double VRatio, ++ double SwathWidth, ++ double Bpp, ++ double LineTime, ++ double XFCTSlvVupdateOffset, ++ double XFCTSlvVupdateWidth, ++ double XFCTSlvVreadyOffset, ++ double XFCXBUFLatencyTolerance, ++ double XFCFillBWOverhead, ++ double XFCSlvChunkSize, ++ double XFCBusTransportTime, ++ double TCalc, ++ double TWait, ++ double *SrcActiveDrainRate, ++ double *TInitXFill, ++ double *TslvChk); ++static void CalculateActiveRowBandwidth( ++ bool GPUVMEnable, ++ enum source_format_class SourcePixelFormat, ++ double VRatio, ++ bool DCCEnable, ++ double LineTime, ++ unsigned int MetaRowByteLuma, ++ unsigned int MetaRowByteChroma, ++ unsigned int meta_row_height_luma, ++ unsigned int meta_row_height_chroma, ++ unsigned int PixelPTEBytesPerRowLuma, ++ unsigned int PixelPTEBytesPerRowChroma, ++ unsigned int dpte_row_height_luma, ++ unsigned int dpte_row_height_chroma, ++ double *meta_row_bw, ++ double *dpte_row_bw); ++static void CalculateFlipSchedule( ++ struct display_mode_lib *mode_lib, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ double UrgentExtraLatency, ++ double UrgentLatency, ++ unsigned int GPUVMMaxPageTableLevels, ++ bool HostVMEnable, ++ unsigned int HostVMMaxPageTableLevels, ++ unsigned int HostVMCachedPageTableLevels, ++ bool GPUVMEnable, ++ double PDEAndMetaPTEBytesPerFrame, ++ double MetaRowBytes, ++ double DPTEBytesPerRow, ++ double BandwidthAvailableForImmediateFlip, ++ unsigned int TotImmediateFlipBytes, ++ enum source_format_class SourcePixelFormat, ++ double LineTime, ++ double VRatio, ++ double Tno_bw, ++ bool DCCEnable, ++ unsigned int dpte_row_height, ++ unsigned int meta_row_height, ++ unsigned int dpte_row_height_chroma, ++ unsigned int meta_row_height_chroma, ++ double *DestinationLinesToRequestVMInImmediateFlip, ++ double *DestinationLinesToRequestRowInImmediateFlip, ++ double *final_flip_bw, ++ bool *ImmediateFlipSupportedForPipe); ++static double CalculateWriteBackDelay( ++ enum source_format_class WritebackPixelFormat, ++ double WritebackHRatio, ++ double WritebackVRatio, ++ unsigned int WritebackLumaHTaps, ++ unsigned int WritebackLumaVTaps, ++ unsigned int WritebackChromaHTaps, ++ unsigned int WritebackChromaVTaps, ++ unsigned int WritebackDestinationWidth); ++static void CalculateWatermarksAndDRAMSpeedChangeSupport( ++ struct display_mode_lib *mode_lib, ++ unsigned int PrefetchMode, ++ unsigned int NumberOfActivePlanes, ++ unsigned int MaxLineBufferLines, ++ unsigned int LineBufferSize, ++ unsigned int DPPOutputBufferPixels, ++ double DETBufferSizeInKByte, ++ unsigned int WritebackInterfaceLumaBufferSize, ++ unsigned int WritebackInterfaceChromaBufferSize, ++ double DCFCLK, ++ double UrgentOutOfOrderReturn, ++ double ReturnBW, ++ bool GPUVMEnable, ++ long dpte_group_bytes[], ++ unsigned int MetaChunkSize, ++ double UrgentLatency, ++ double ExtraLatency, ++ double WritebackLatency, ++ double WritebackChunkSize, ++ double SOCCLK, ++ double DRAMClockChangeLatency, ++ double SRExitTime, ++ double SREnterPlusExitTime, ++ double DCFCLKDeepSleep, ++ int DPPPerPlane[], ++ bool DCCEnable[], ++ double DPPCLK[], ++ unsigned int SwathWidthSingleDPPY[], ++ unsigned int SwathHeightY[], ++ double ReadBandwidthPlaneLuma[], ++ unsigned int SwathHeightC[], ++ double ReadBandwidthPlaneChroma[], ++ unsigned int LBBitPerPixel[], ++ unsigned int SwathWidthY[], ++ double HRatio[], ++ unsigned int vtaps[], ++ unsigned int VTAPsChroma[], ++ double VRatio[], ++ unsigned int HTotal[], ++ double PixelClock[], ++ unsigned int BlendingAndTiming[], ++ double BytePerPixelDETY[], ++ double BytePerPixelDETC[], ++ bool WritebackEnable[], ++ enum source_format_class WritebackPixelFormat[], ++ double WritebackDestinationWidth[], ++ double WritebackDestinationHeight[], ++ double WritebackSourceHeight[], ++ enum clock_change_support *DRAMClockChangeSupport, ++ double *UrgentWatermark, ++ double *WritebackUrgentWatermark, ++ double *DRAMClockChangeWatermark, ++ double *WritebackDRAMClockChangeWatermark, ++ double *StutterExitWatermark, ++ double *StutterEnterPlusExitWatermark, ++ double *MinActiveDRAMClockChangeLatencySupported); ++static void CalculateDCFCLKDeepSleep( ++ struct display_mode_lib *mode_lib, ++ unsigned int NumberOfActivePlanes, ++ double BytePerPixelDETY[], ++ double BytePerPixelDETC[], ++ double VRatio[], ++ unsigned int SwathWidthY[], ++ int DPPPerPlane[], ++ double HRatio[], ++ double PixelClock[], ++ double PSCL_THROUGHPUT[], ++ double PSCL_THROUGHPUT_CHROMA[], ++ double DPPCLK[], ++ double *DCFCLKDeepSleep); ++static void CalculateDETBufferSize( ++ double DETBufferSizeInKByte, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ double *DETBufferSizeY, ++ double *DETBufferSizeC); ++static void CalculateUrgentBurstFactor( ++ unsigned int DETBufferSizeInKByte, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ unsigned int SwathWidthY, ++ double LineTime, ++ double UrgentLatency, ++ double CursorBufferSize, ++ unsigned int CursorWidth, ++ unsigned int CursorBPP, ++ double VRatio, ++ double VRatioPreY, ++ double VRatioPreC, ++ double BytePerPixelInDETY, ++ double BytePerPixelInDETC, ++ double *UrgentBurstFactorCursor, ++ double *UrgentBurstFactorCursorPre, ++ double *UrgentBurstFactorLuma, ++ double *UrgentBurstFactorLumaPre, ++ double *UrgentBurstFactorChroma, ++ double *UrgentBurstFactorChromaPre, ++ unsigned int *NotEnoughUrgentLatencyHiding, ++ unsigned int *NotEnoughUrgentLatencyHidingPre); ++ ++static void CalculatePixelDeliveryTimes( ++ unsigned int NumberOfActivePlanes, ++ double VRatio[], ++ double VRatioPrefetchY[], ++ double VRatioPrefetchC[], ++ unsigned int swath_width_luma_ub[], ++ unsigned int swath_width_chroma_ub[], ++ int DPPPerPlane[], ++ double HRatio[], ++ double PixelClock[], ++ double PSCL_THROUGHPUT[], ++ double PSCL_THROUGHPUT_CHROMA[], ++ double DPPCLK[], ++ double BytePerPixelDETC[], ++ enum scan_direction_class SourceScan[], ++ unsigned int BlockWidth256BytesY[], ++ unsigned int BlockHeight256BytesY[], ++ unsigned int BlockWidth256BytesC[], ++ unsigned int BlockHeight256BytesC[], ++ double DisplayPipeLineDeliveryTimeLuma[], ++ double DisplayPipeLineDeliveryTimeChroma[], ++ double DisplayPipeLineDeliveryTimeLumaPrefetch[], ++ double DisplayPipeLineDeliveryTimeChromaPrefetch[], ++ double DisplayPipeRequestDeliveryTimeLuma[], ++ double DisplayPipeRequestDeliveryTimeChroma[], ++ double DisplayPipeRequestDeliveryTimeLumaPrefetch[], ++ double DisplayPipeRequestDeliveryTimeChromaPrefetch[]); ++ ++static void CalculateMetaAndPTETimes( ++ unsigned int NumberOfActivePlanes, ++ bool GPUVMEnable, ++ unsigned int MetaChunkSize, ++ unsigned int MinMetaChunkSizeBytes, ++ unsigned int GPUVMMaxPageTableLevels, ++ unsigned int HTotal[], ++ double VRatio[], ++ double VRatioPrefetchY[], ++ double VRatioPrefetchC[], ++ double DestinationLinesToRequestRowInVBlank[], ++ double DestinationLinesToRequestRowInImmediateFlip[], ++ double DestinationLinesToRequestVMInVBlank[], ++ double DestinationLinesToRequestVMInImmediateFlip[], ++ bool DCCEnable[], ++ double PixelClock[], ++ double BytePerPixelDETY[], ++ double BytePerPixelDETC[], ++ enum scan_direction_class SourceScan[], ++ unsigned int dpte_row_height[], ++ unsigned int dpte_row_height_chroma[], ++ unsigned int meta_row_width[], ++ unsigned int meta_row_height[], ++ unsigned int meta_req_width[], ++ unsigned int meta_req_height[], ++ long dpte_group_bytes[], ++ unsigned int PTERequestSizeY[], ++ unsigned int PTERequestSizeC[], ++ unsigned int PixelPTEReqWidthY[], ++ unsigned int PixelPTEReqHeightY[], ++ unsigned int PixelPTEReqWidthC[], ++ unsigned int PixelPTEReqHeightC[], ++ unsigned int dpte_row_width_luma_ub[], ++ unsigned int dpte_row_width_chroma_ub[], ++ unsigned int vm_group_bytes[], ++ unsigned int dpde0_bytes_per_frame_ub_l[], ++ unsigned int dpde0_bytes_per_frame_ub_c[], ++ unsigned int meta_pte_bytes_per_frame_ub_l[], ++ unsigned int meta_pte_bytes_per_frame_ub_c[], ++ double DST_Y_PER_PTE_ROW_NOM_L[], ++ double DST_Y_PER_PTE_ROW_NOM_C[], ++ double DST_Y_PER_META_ROW_NOM_L[], ++ double TimePerMetaChunkNominal[], ++ double TimePerMetaChunkVBlank[], ++ double TimePerMetaChunkFlip[], ++ double time_per_pte_group_nom_luma[], ++ double time_per_pte_group_vblank_luma[], ++ double time_per_pte_group_flip_luma[], ++ double time_per_pte_group_nom_chroma[], ++ double time_per_pte_group_vblank_chroma[], ++ double time_per_pte_group_flip_chroma[], ++ double TimePerVMGroupVBlank[], ++ double TimePerVMGroupFlip[], ++ double TimePerVMRequestVBlank[], ++ double TimePerVMRequestFlip[]); ++ ++static double CalculateExtraLatency( ++ double UrgentRoundTripAndOutOfOrderLatency, ++ int TotalNumberOfActiveDPP, ++ int PixelChunkSizeInKByte, ++ int TotalNumberOfDCCActiveDPP, ++ int MetaChunkSize, ++ double ReturnBW, ++ bool GPUVMEnable, ++ bool HostVMEnable, ++ int NumberOfActivePlanes, ++ int NumberOfDPP[], ++ long dpte_group_bytes[], ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ int HostVMMaxPageTableLevels, ++ int HostVMCachedPageTableLevels); ++ ++void dml21_recalculate(struct display_mode_lib *mode_lib) ++{ ++ ModeSupportAndSystemConfiguration(mode_lib); ++ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); ++ DisplayPipeConfiguration(mode_lib); ++ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); ++} ++ ++static unsigned int dscceComputeDelay( ++ unsigned int bpc, ++ double bpp, ++ unsigned int sliceWidth, ++ unsigned int numSlices, ++ enum output_format_class pixelFormat) ++{ ++ // valid bpc = source bits per component in the set of {8, 10, 12} ++ // valid bpp = increments of 1/16 of a bit ++ // min = 6/7/8 in N420/N422/444, respectively ++ // max = such that compression is 1:1 ++ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) ++ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} ++ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} ++ ++ // fixed value ++ unsigned int rcModelSize = 8192; ++ ++ // N422/N420 operate at 2 pixels per clock ++ unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l, ++ Delay, pixels; ++ ++ if (pixelFormat == dm_n422 || pixelFormat == dm_420) ++ pixelsPerClock = 2; ++ // #all other modes operate at 1 pixel per clock ++ else ++ pixelsPerClock = 1; ++ ++ //initial transmit delay as per PPS ++ initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); ++ ++ //compute ssm delay ++ if (bpc == 8) ++ D = 81; ++ else if (bpc == 10) ++ D = 89; ++ else ++ D = 113; ++ ++ //divide by pixel per cycle to compute slice width as seen by DSC ++ w = sliceWidth / pixelsPerClock; ++ ++ //422 mode has an additional cycle of delay ++ if (pixelFormat == dm_s422) ++ S = 1; ++ else ++ S = 0; ++ ++ //main calculation for the dscce ++ ix = initalXmitDelay + 45; ++ wx = (w + 2) / 3; ++ p = 3 * wx - w; ++ l0 = ix / w; ++ a = ix + p * l0; ++ ax = (a + 2) / 3 + D + 6 + 1; ++ l = (ax + wx - 1) / wx; ++ if ((ix % w) == 0 && p != 0) ++ lstall = 1; ++ else ++ lstall = 0; ++ Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22; ++ ++ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels ++ pixels = Delay * 3 * pixelsPerClock; ++ return pixels; ++} ++ ++static unsigned int dscComputeDelay(enum output_format_class pixelFormat) ++{ ++ unsigned int Delay = 0; ++ ++ if (pixelFormat == dm_420) { ++ // sfr ++ Delay = Delay + 2; ++ // dsccif ++ Delay = Delay + 0; ++ // dscc - input deserializer ++ Delay = Delay + 3; ++ // dscc gets pixels every other cycle ++ Delay = Delay + 2; ++ // dscc - input cdc fifo ++ Delay = Delay + 12; ++ // dscc gets pixels every other cycle ++ Delay = Delay + 13; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output cdc fifo ++ Delay = Delay + 7; ++ // dscc gets pixels every other cycle ++ Delay = Delay + 3; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output serializer ++ Delay = Delay + 1; ++ // sft ++ Delay = Delay + 1; ++ } else if (pixelFormat == dm_n422) { ++ // sfr ++ Delay = Delay + 2; ++ // dsccif ++ Delay = Delay + 1; ++ // dscc - input deserializer ++ Delay = Delay + 5; ++ // dscc - input cdc fifo ++ Delay = Delay + 25; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output cdc fifo ++ Delay = Delay + 10; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output serializer ++ Delay = Delay + 1; ++ // sft ++ Delay = Delay + 1; ++ } else { ++ // sfr ++ Delay = Delay + 2; ++ // dsccif ++ Delay = Delay + 0; ++ // dscc - input deserializer ++ Delay = Delay + 3; ++ // dscc - input cdc fifo ++ Delay = Delay + 12; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // dscc - output cdc fifo ++ Delay = Delay + 7; ++ // dscc - output serializer ++ Delay = Delay + 1; ++ // dscc - cdc uncertainty ++ Delay = Delay + 2; ++ // sft ++ Delay = Delay + 1; ++ } ++ ++ return Delay; ++} ++ ++static bool CalculatePrefetchSchedule( ++ struct display_mode_lib *mode_lib, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ Pipe *myPipe, ++ unsigned int DSCDelay, ++ double DPPCLKDelaySubtotal, ++ double DPPCLKDelaySCL, ++ double DPPCLKDelaySCLLBOnly, ++ double DPPCLKDelayCNVCFormater, ++ double DPPCLKDelayCNVCCursor, ++ double DISPCLKDelaySubtotal, ++ unsigned int ScalerRecoutWidth, ++ enum output_format_class OutputFormat, ++ unsigned int MaxInterDCNTileRepeaters, ++ unsigned int VStartup, ++ unsigned int MaxVStartup, ++ unsigned int GPUVMPageTableLevels, ++ bool GPUVMEnable, ++ HostVM *myHostVM, ++ bool DynamicMetadataEnable, ++ int DynamicMetadataLinesBeforeActiveRequired, ++ unsigned int DynamicMetadataTransmittedBytes, ++ bool DCCEnable, ++ double UrgentLatency, ++ double UrgentExtraLatency, ++ double TCalc, ++ unsigned int PDEAndMetaPTEBytesFrame, ++ unsigned int MetaRowByte, ++ unsigned int PixelPTEBytesPerRow, ++ double PrefetchSourceLinesY, ++ unsigned int SwathWidthY, ++ double BytePerPixelDETY, ++ double VInitPreFillY, ++ unsigned int MaxNumSwathY, ++ double PrefetchSourceLinesC, ++ double BytePerPixelDETC, ++ double VInitPreFillC, ++ unsigned int MaxNumSwathC, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ double TWait, ++ bool XFCEnabled, ++ double XFCRemoteSurfaceFlipDelay, ++ bool ProgressiveToInterlaceUnitInOPP, ++ double *DSTXAfterScaler, ++ double *DSTYAfterScaler, ++ double *DestinationLinesForPrefetch, ++ double *PrefetchBandwidth, ++ double *DestinationLinesToRequestVMInVBlank, ++ double *DestinationLinesToRequestRowInVBlank, ++ double *VRatioPrefetchY, ++ double *VRatioPrefetchC, ++ double *RequiredPrefetchPixDataBWLuma, ++ double *RequiredPrefetchPixDataBWChroma, ++ unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, ++ double *Tno_bw, ++ double *prefetch_vmrow_bw, ++ unsigned int *swath_width_luma_ub, ++ unsigned int *swath_width_chroma_ub, ++ unsigned int *VUpdateOffsetPix, ++ double *VUpdateWidthPix, ++ double *VReadyOffsetPix) ++{ ++ bool MyError = false; ++ unsigned int DPPCycles, DISPCLKCycles; ++ double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime; ++ double Tdm, LineTime, Tsetup; ++ double dst_y_prefetch_equ; ++ double Tsw_oto; ++ double prefetch_bw_oto; ++ double Tvm_oto; ++ double Tr0_oto; ++ double Tvm_oto_lines; ++ double Tr0_oto_lines; ++ double Tsw_oto_lines; ++ double dst_y_prefetch_oto; ++ double TimeForFetchingMetaPTE = 0; ++ double TimeForFetchingRowInVBlank = 0; ++ double LinesToRequestPrefetchPixelData = 0; ++ double HostVMInefficiencyFactor; ++ unsigned int HostVMDynamicLevels; ++ ++ if (GPUVMEnable == true && myHostVM->Enable == true) { ++ HostVMInefficiencyFactor = ++ PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData ++ / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; ++ HostVMDynamicLevels = myHostVM->MaxPageTableLevels ++ - myHostVM->CachedPageTableLevels; ++ } else { ++ HostVMInefficiencyFactor = 1; ++ HostVMDynamicLevels = 0; ++ } ++ ++ if (myPipe->ScalerEnabled) ++ DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; ++ else ++ DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; ++ ++ DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; ++ ++ DISPCLKCycles = DISPCLKDelaySubtotal; ++ ++ if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) ++ return true; ++ ++ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK ++ + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; ++ ++ if (myPipe->DPPPerPlane > 1) ++ *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; ++ ++ if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) ++ *DSTYAfterScaler = 1; ++ else ++ *DSTYAfterScaler = 0; ++ ++ DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler; ++ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); ++ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); ++ ++ *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1); ++ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK); ++ *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime) ++ * myPipe->PixelClock; ++ ++ *VReadyOffsetPix = dml_max( ++ 150.0 / myPipe->DPPCLK, ++ TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK) ++ * myPipe->PixelClock; ++ ++ Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock; ++ ++ LineTime = (double) myPipe->HTotal / myPipe->PixelClock; ++ ++ if (DynamicMetadataEnable) { ++ double Tdmbf, Tdmec, Tdmsks; ++ ++ Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); ++ Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK; ++ Tdmec = LineTime; ++ if (DynamicMetadataLinesBeforeActiveRequired == -1) ++ Tdmsks = myPipe->VBlank * LineTime / 2.0; ++ else ++ Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; ++ if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) ++ Tdmsks = Tdmsks / 2; ++ if (VStartup * LineTime ++ < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { ++ MyError = true; ++ *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait ++ + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime; ++ } else ++ *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0; ++ } else ++ Tdm = 0; ++ ++ if (GPUVMEnable) { ++ if (GPUVMPageTableLevels >= 3) ++ *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1); ++ else ++ *Tno_bw = 0; ++ } else if (!DCCEnable) ++ *Tno_bw = LineTime; ++ else ++ *Tno_bw = LineTime / 4; ++ ++ dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime ++ - (Tsetup + Tdm) / LineTime ++ - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); ++ ++ Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; ++ ++ if (myPipe->SourceScan == dm_horz) { ++ *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY; ++ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC; ++ } else { ++ *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY; ++ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC; ++ } ++ ++ prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto; ++ ++ ++ if (GPUVMEnable == true) { ++ Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, ++ dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), ++ LineTime / 4.0)); ++ } else ++ Tvm_oto = LineTime / 4.0; ++ ++ if ((GPUVMEnable == true || DCCEnable == true)) { ++ Tr0_oto = dml_max( ++ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, ++ dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4))); ++ } else ++ Tr0_oto = (LineTime - Tvm_oto) / 2.0; ++ ++ Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0; ++ Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0; ++ Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0; ++ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75; ++ ++ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; ++ ++ if (dst_y_prefetch_oto < dst_y_prefetch_equ) ++ *DestinationLinesForPrefetch = dst_y_prefetch_oto; ++ else ++ *DestinationLinesForPrefetch = dst_y_prefetch_equ; ++ ++ dml_print("DML: VStartup: %d\n", VStartup); ++ dml_print("DML: TCalc: %f\n", TCalc); ++ dml_print("DML: TWait: %f\n", TWait); ++ dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); ++ dml_print("DML: LineTime: %f\n", LineTime); ++ dml_print("DML: Tsetup: %f\n", Tsetup); ++ dml_print("DML: Tdm: %f\n", Tdm); ++ dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler); ++ dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler); ++ dml_print("DML: HTotal: %d\n", myPipe->HTotal); ++ ++ *PrefetchBandwidth = 0; ++ *DestinationLinesToRequestVMInVBlank = 0; ++ *DestinationLinesToRequestRowInVBlank = 0; ++ *VRatioPrefetchY = 0; ++ *VRatioPrefetchC = 0; ++ *RequiredPrefetchPixDataBWLuma = 0; ++ if (*DestinationLinesForPrefetch > 1) { ++ double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte ++ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor ++ + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) ++ + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) ++ / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); ++ ++ double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * ++ HostVMInefficiencyFactor + PrefetchSourceLinesY * ++ *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + ++ PrefetchSourceLinesC * *swath_width_chroma_ub * ++ dml_ceil(BytePerPixelDETC, 2)) / ++ (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 * ++ UrgentLatency * (1 + HostVMDynamicLevels)); ++ ++ double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow ++ * HostVMInefficiencyFactor + PrefetchSourceLinesY * ++ *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + ++ PrefetchSourceLinesC * *swath_width_chroma_ub * ++ dml_ceil(BytePerPixelDETC, 2)) / ++ (*DestinationLinesForPrefetch * LineTime - ++ UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels ++ * (HostVMDynamicLevels + 1) - 1)); ++ ++ double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub * ++ dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * ++ *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / ++ (*DestinationLinesForPrefetch * LineTime - ++ UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels ++ * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency * ++ (1 + HostVMDynamicLevels)); ++ ++ if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) { ++ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw); ++ } ++ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) { ++ *PrefetchBandwidth = PrefetchBandwidth1; ++ } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) { ++ *PrefetchBandwidth = PrefetchBandwidth2; ++ } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) { ++ *PrefetchBandwidth = PrefetchBandwidth3; ++ } else { ++ *PrefetchBandwidth = PrefetchBandwidth4; ++ } ++ ++ if (GPUVMEnable) { ++ TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth, ++ dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4)); ++ } else { ++// 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor ++// so if this needs to be reinstated, then it should be officially done in the VBA code as well. ++// if (mode_lib->NumberOfCursors > 0 || XFCEnabled) ++ TimeForFetchingMetaPTE = LineTime / 4; ++// else ++// TimeForFetchingMetaPTE = 0.0; ++ } ++ ++ if ((GPUVMEnable == true || DCCEnable == true)) { ++ TimeForFetchingRowInVBlank = ++ dml_max( ++ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) ++ / *PrefetchBandwidth, ++ dml_max( ++ UrgentLatency * (1 + HostVMDynamicLevels), ++ dml_max( ++ (LineTime ++ - TimeForFetchingMetaPTE) / 2.0, ++ LineTime ++ / 4.0))); ++ } else { ++// See note above dated 5/30/2018 ++// if (NumberOfCursors > 0 || XFCEnabled) ++ TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0; ++// else // TODO: Did someone else add this?? ++// TimeForFetchingRowInVBlank = 0.0; ++ } ++ ++ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; ++ ++ *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; ++ ++ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch ++// See note above dated 5/30/2018 ++// - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? ++ - ((GPUVMEnable || DCCEnable) ? ++ (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : ++ 0.0); // TODO: Did someone else add this?? ++ ++ if (LinesToRequestPrefetchPixelData > 0) { ++ ++ *VRatioPrefetchY = (double) PrefetchSourceLinesY ++ / LinesToRequestPrefetchPixelData; ++ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); ++ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { ++ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { ++ *VRatioPrefetchY = ++ dml_max( ++ (double) PrefetchSourceLinesY ++ / LinesToRequestPrefetchPixelData, ++ (double) MaxNumSwathY ++ * SwathHeightY ++ / (LinesToRequestPrefetchPixelData ++ - (VInitPreFillY ++ - 3.0) ++ / 2.0)); ++ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); ++ } else { ++ MyError = true; ++ *VRatioPrefetchY = 0; ++ } ++ } ++ ++ *VRatioPrefetchC = (double) PrefetchSourceLinesC ++ / LinesToRequestPrefetchPixelData; ++ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); ++ ++ if ((SwathHeightC > 4)) { ++ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { ++ *VRatioPrefetchC = ++ dml_max( ++ *VRatioPrefetchC, ++ (double) MaxNumSwathC ++ * SwathHeightC ++ / (LinesToRequestPrefetchPixelData ++ - (VInitPreFillC ++ - 3.0) ++ / 2.0)); ++ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); ++ } else { ++ MyError = true; ++ *VRatioPrefetchC = 0; ++ } ++ } ++ ++ *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane ++ * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData ++ * dml_ceil(BytePerPixelDETY, 1) ++ * *swath_width_luma_ub / LineTime; ++ *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane ++ * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData ++ * dml_ceil(BytePerPixelDETC, 2) ++ * *swath_width_chroma_ub / LineTime; ++ } else { ++ MyError = true; ++ *VRatioPrefetchY = 0; ++ *VRatioPrefetchC = 0; ++ *RequiredPrefetchPixDataBWLuma = 0; ++ *RequiredPrefetchPixDataBWChroma = 0; ++ } ++ ++ dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE); ++ dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank); ++ dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank); ++ dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime); ++ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); ++ ++ } else { ++ MyError = true; ++ } ++ ++ { ++ double prefetch_vm_bw; ++ double prefetch_row_bw; ++ ++ if (PDEAndMetaPTEBytesFrame == 0) { ++ prefetch_vm_bw = 0; ++ } else if (*DestinationLinesToRequestVMInVBlank > 0) { ++ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); ++ } else { ++ prefetch_vm_bw = 0; ++ MyError = true; ++ } ++ if (MetaRowByte + PixelPTEBytesPerRow == 0) { ++ prefetch_row_bw = 0; ++ } else if (*DestinationLinesToRequestRowInVBlank > 0) { ++ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); ++ } else { ++ prefetch_row_bw = 0; ++ MyError = true; ++ } ++ ++ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); ++ } ++ ++ if (MyError) { ++ *PrefetchBandwidth = 0; ++ TimeForFetchingMetaPTE = 0; ++ TimeForFetchingRowInVBlank = 0; ++ *DestinationLinesToRequestVMInVBlank = 0; ++ *DestinationLinesToRequestRowInVBlank = 0; ++ *DestinationLinesForPrefetch = 0; ++ LinesToRequestPrefetchPixelData = 0; ++ *VRatioPrefetchY = 0; ++ *VRatioPrefetchC = 0; ++ *RequiredPrefetchPixDataBWLuma = 0; ++ *RequiredPrefetchPixDataBWChroma = 0; ++ } ++ ++ return MyError; ++} ++ ++static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) ++{ ++ return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); ++} ++ ++static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) ++{ ++ return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); ++} ++ ++static double CalculateDCCConfiguration( ++ bool DCCEnabled, ++ bool DCCProgrammingAssumesScanDirectionUnknown, ++ unsigned int ViewportWidth, ++ unsigned int ViewportHeight, ++ double DETBufferSize, ++ unsigned int RequestHeight256Byte, ++ unsigned int SwathHeight, ++ enum dm_swizzle_mode TilingFormat, ++ unsigned int BytePerPixel, ++ enum scan_direction_class ScanOrientation, ++ unsigned int *MaxUncompressedBlock, ++ unsigned int *MaxCompressedBlock, ++ unsigned int *Independent64ByteBlock) ++{ ++ double MaximumDCCCompressionSurface = 0.0; ++ enum { ++ REQ_256Bytes, ++ REQ_128BytesNonContiguous, ++ REQ_128BytesContiguous, ++ REQ_NA ++ } Request = REQ_NA; ++ ++ if (DCCEnabled == true) { ++ if (DCCProgrammingAssumesScanDirectionUnknown == true) { ++ if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel ++ && DETBufferSize ++ >= 256 / RequestHeight256Byte ++ * ViewportHeight) { ++ Request = REQ_256Bytes; ++ } else if ((DETBufferSize ++ < RequestHeight256Byte * ViewportWidth * BytePerPixel ++ && (BytePerPixel == 2 || BytePerPixel == 4)) ++ || (DETBufferSize ++ < 256 / RequestHeight256Byte ++ * ViewportHeight ++ && BytePerPixel == 8 ++ && (TilingFormat == dm_sw_4kb_d ++ || TilingFormat ++ == dm_sw_4kb_d_x ++ || TilingFormat ++ == dm_sw_var_d ++ || TilingFormat ++ == dm_sw_var_d_x ++ || TilingFormat ++ == dm_sw_64kb_d ++ || TilingFormat ++ == dm_sw_64kb_d_x ++ || TilingFormat ++ == dm_sw_64kb_d_t ++ || TilingFormat ++ == dm_sw_64kb_r_x))) { ++ Request = REQ_128BytesNonContiguous; ++ } else { ++ Request = REQ_128BytesContiguous; ++ } ++ } else { ++ if (BytePerPixel == 1) { ++ if (ScanOrientation == dm_vert || SwathHeight == 16) { ++ Request = REQ_256Bytes; ++ } else { ++ Request = REQ_128BytesContiguous; ++ } ++ } else if (BytePerPixel == 2) { ++ if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) { ++ Request = REQ_256Bytes; ++ } else if (ScanOrientation == dm_vert) { ++ Request = REQ_128BytesContiguous; ++ } else { ++ Request = REQ_128BytesNonContiguous; ++ } ++ } else if (BytePerPixel == 4) { ++ if (SwathHeight == 8) { ++ Request = REQ_256Bytes; ++ } else if (ScanOrientation == dm_vert) { ++ Request = REQ_128BytesContiguous; ++ } else { ++ Request = REQ_128BytesNonContiguous; ++ } ++ } else if (BytePerPixel == 8) { ++ if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x ++ || TilingFormat == dm_sw_var_d ++ || TilingFormat == dm_sw_var_d_x ++ || TilingFormat == dm_sw_64kb_d ++ || TilingFormat == dm_sw_64kb_d_x ++ || TilingFormat == dm_sw_64kb_d_t ++ || TilingFormat == dm_sw_64kb_r_x) { ++ if ((ScanOrientation == dm_vert && SwathHeight == 8) ++ || (ScanOrientation != dm_vert ++ && SwathHeight == 4)) { ++ Request = REQ_256Bytes; ++ } else if (ScanOrientation != dm_vert) { ++ Request = REQ_128BytesContiguous; ++ } else { ++ Request = REQ_128BytesNonContiguous; ++ } ++ } else { ++ if (ScanOrientation != dm_vert || SwathHeight == 8) { ++ Request = REQ_256Bytes; ++ } else { ++ Request = REQ_128BytesContiguous; ++ } ++ } ++ } ++ } ++ } else { ++ Request = REQ_NA; ++ } ++ ++ if (Request == REQ_256Bytes) { ++ *MaxUncompressedBlock = 256; ++ *MaxCompressedBlock = 256; ++ *Independent64ByteBlock = false; ++ MaximumDCCCompressionSurface = 4.0; ++ } else if (Request == REQ_128BytesContiguous) { ++ *MaxUncompressedBlock = 128; ++ *MaxCompressedBlock = 128; ++ *Independent64ByteBlock = false; ++ MaximumDCCCompressionSurface = 2.0; ++ } else if (Request == REQ_128BytesNonContiguous) { ++ *MaxUncompressedBlock = 256; ++ *MaxCompressedBlock = 64; ++ *Independent64ByteBlock = true; ++ MaximumDCCCompressionSurface = 4.0; ++ } else { ++ *MaxUncompressedBlock = 0; ++ *MaxCompressedBlock = 0; ++ *Independent64ByteBlock = 0; ++ MaximumDCCCompressionSurface = 0.0; ++ } ++ ++ return MaximumDCCCompressionSurface; ++} ++ ++static double CalculatePrefetchSourceLines( ++ struct display_mode_lib *mode_lib, ++ double VRatio, ++ double vtaps, ++ bool Interlace, ++ bool ProgressiveToInterlaceUnitInOPP, ++ unsigned int SwathHeight, ++ unsigned int ViewportYStart, ++ double *VInitPreFill, ++ unsigned int *MaxNumSwath) ++{ ++ unsigned int MaxPartialSwath; ++ ++ if (ProgressiveToInterlaceUnitInOPP) ++ *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); ++ else ++ *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); ++ ++ if (!mode_lib->vba.IgnoreViewportPositioning) { ++ ++ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; ++ ++ if (*VInitPreFill > 1.0) ++ MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; ++ else ++ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) ++ % SwathHeight; ++ MaxPartialSwath = dml_max(1U, MaxPartialSwath); ++ ++ } else { ++ ++ if (ViewportYStart != 0) ++ dml_print( ++ "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); ++ ++ *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); ++ ++ if (*VInitPreFill > 1.0) ++ MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; ++ else ++ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) ++ % SwathHeight; ++ } ++ ++ return *MaxNumSwath * SwathHeight + MaxPartialSwath; ++} ++ ++static unsigned int CalculateVMAndRowBytes( ++ struct display_mode_lib *mode_lib, ++ bool DCCEnable, ++ unsigned int BlockHeight256Bytes, ++ unsigned int BlockWidth256Bytes, ++ enum source_format_class SourcePixelFormat, ++ unsigned int SurfaceTiling, ++ unsigned int BytePerPixel, ++ enum scan_direction_class ScanDirection, ++ unsigned int ViewportWidth, ++ unsigned int ViewportHeight, ++ unsigned int SwathWidth, ++ bool GPUVMEnable, ++ bool HostVMEnable, ++ unsigned int HostVMMaxPageTableLevels, ++ unsigned int HostVMCachedPageTableLevels, ++ unsigned int VMMPageSize, ++ unsigned int PTEBufferSizeInRequests, ++ unsigned int Pitch, ++ unsigned int DCCMetaPitch, ++ unsigned int *MacroTileWidth, ++ unsigned int *MetaRowByte, ++ unsigned int *PixelPTEBytesPerRow, ++ bool *PTEBufferSizeNotExceeded, ++ unsigned int *dpte_row_width_ub, ++ unsigned int *dpte_row_height, ++ unsigned int *MetaRequestWidth, ++ unsigned int *MetaRequestHeight, ++ unsigned int *meta_row_width, ++ unsigned int *meta_row_height, ++ unsigned int *vm_group_bytes, ++ long *dpte_group_bytes, ++ unsigned int *PixelPTEReqWidth, ++ unsigned int *PixelPTEReqHeight, ++ unsigned int *PTERequestSize, ++ unsigned int *DPDE0BytesFrame, ++ unsigned int *MetaPTEBytesFrame) ++{ ++ unsigned int MPDEBytesFrame; ++ unsigned int DCCMetaSurfaceBytes; ++ unsigned int MacroTileSizeBytes; ++ unsigned int MacroTileHeight; ++ unsigned int ExtraDPDEBytesFrame; ++ unsigned int PDEAndMetaPTEBytesFrame; ++ unsigned int PixelPTEReqHeightPTEs; ++ ++ if (DCCEnable == true) { ++ *MetaRequestHeight = 8 * BlockHeight256Bytes; ++ *MetaRequestWidth = 8 * BlockWidth256Bytes; ++ if (ScanDirection == dm_horz) { ++ *meta_row_height = *MetaRequestHeight; ++ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) ++ + *MetaRequestWidth; ++ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; ++ } else { ++ *meta_row_height = *MetaRequestWidth; ++ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) ++ + *MetaRequestHeight; ++ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; ++ } ++ if (ScanDirection == dm_horz) { ++ DCCMetaSurfaceBytes = DCCMetaPitch ++ * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) ++ + 64 * BlockHeight256Bytes) * BytePerPixel ++ / 256; ++ } else { ++ DCCMetaSurfaceBytes = DCCMetaPitch ++ * (dml_ceil( ++ (double) ViewportHeight - 1, ++ 64 * BlockHeight256Bytes) ++ + 64 * BlockHeight256Bytes) * BytePerPixel ++ / 256; ++ } ++ if (GPUVMEnable == true) { ++ *MetaPTEBytesFrame = (dml_ceil( ++ (double) (DCCMetaSurfaceBytes - VMMPageSize) ++ / (8 * VMMPageSize), ++ 1) + 1) * 64; ++ MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2); ++ } else { ++ *MetaPTEBytesFrame = 0; ++ MPDEBytesFrame = 0; ++ } ++ } else { ++ *MetaPTEBytesFrame = 0; ++ MPDEBytesFrame = 0; ++ *MetaRowByte = 0; ++ } ++ ++ if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { ++ MacroTileSizeBytes = 256; ++ MacroTileHeight = BlockHeight256Bytes; ++ } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x ++ || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { ++ MacroTileSizeBytes = 4096; ++ MacroTileHeight = 4 * BlockHeight256Bytes; ++ } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t ++ || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d ++ || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x ++ || SurfaceTiling == dm_sw_64kb_r_x) { ++ MacroTileSizeBytes = 65536; ++ MacroTileHeight = 16 * BlockHeight256Bytes; ++ } else { ++ MacroTileSizeBytes = 262144; ++ MacroTileHeight = 32 * BlockHeight256Bytes; ++ } ++ *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; ++ ++ if (GPUVMEnable == true && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) { ++ if (ScanDirection == dm_horz) { ++ *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); ++ } else { ++ *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); ++ } ++ ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3); ++ } else { ++ *DPDE0BytesFrame = 0; ++ ExtraDPDEBytesFrame = 0; ++ } ++ ++ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame ++ + ExtraDPDEBytesFrame; ++ ++ if (HostVMEnable == true) { ++ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels)); ++ } ++ ++ if (GPUVMEnable == true) { ++ double FractionOfPTEReturnDrop; ++ ++ if (SurfaceTiling == dm_sw_linear) { ++ PixelPTEReqHeightPTEs = 1; ++ *PixelPTEReqHeight = 1; ++ *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; ++ *PTERequestSize = 64; ++ FractionOfPTEReturnDrop = 0; ++ } else if (MacroTileSizeBytes == 4096) { ++ PixelPTEReqHeightPTEs = 1; ++ *PixelPTEReqHeight = MacroTileHeight; ++ *PixelPTEReqWidth = 8 * *MacroTileWidth; ++ *PTERequestSize = 64; ++ if (ScanDirection == dm_horz) ++ FractionOfPTEReturnDrop = 0; ++ else ++ FractionOfPTEReturnDrop = 7 / 8; ++ } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { ++ PixelPTEReqHeightPTEs = 16; ++ *PixelPTEReqHeight = 16 * BlockHeight256Bytes; ++ *PixelPTEReqWidth = 16 * BlockWidth256Bytes; ++ *PTERequestSize = 128; ++ FractionOfPTEReturnDrop = 0; ++ } else { ++ PixelPTEReqHeightPTEs = 1; ++ *PixelPTEReqHeight = MacroTileHeight; ++ *PixelPTEReqWidth = 8 * *MacroTileWidth; ++ *PTERequestSize = 64; ++ FractionOfPTEReturnDrop = 0; ++ } ++ ++ if (SurfaceTiling == dm_sw_linear) { ++ *dpte_row_height = dml_min(128, ++ 1 << (unsigned int) dml_floor( ++ dml_log2( ++ (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), ++ 1)); ++ *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; ++ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; ++ } else if (ScanDirection == dm_horz) { ++ *dpte_row_height = *PixelPTEReqHeight; ++ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; ++ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; ++ } else { ++ *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); ++ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; ++ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; ++ } ++ if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) ++ <= 64 * PTEBufferSizeInRequests) { ++ *PTEBufferSizeNotExceeded = true; ++ } else { ++ *PTEBufferSizeNotExceeded = false; ++ } ++ } else { ++ *PixelPTEBytesPerRow = 0; ++ *PTEBufferSizeNotExceeded = true; ++ } ++ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame); ++ ++ if (HostVMEnable == true) { ++ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels)); ++ } ++ ++ if (HostVMEnable == true) { ++ *vm_group_bytes = 512; ++ *dpte_group_bytes = 512; ++ } else if (GPUVMEnable == true) { ++ *vm_group_bytes = 2048; ++ if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) { ++ *dpte_group_bytes = 512; ++ } else { ++ *dpte_group_bytes = 2048; ++ } ++ } else { ++ *vm_group_bytes = 0; ++ *dpte_group_bytes = 0; ++ } ++ ++ return PDEAndMetaPTEBytesFrame; ++} ++ ++static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( ++ struct display_mode_lib *mode_lib) ++{ ++ struct vba_vars_st *locals = &mode_lib->vba; ++ unsigned int j, k; ++ ++ mode_lib->vba.WritebackDISPCLK = 0.0; ++ mode_lib->vba.DISPCLKWithRamping = 0; ++ mode_lib->vba.DISPCLKWithoutRamping = 0; ++ mode_lib->vba.GlobalDPPCLK = 0.0; ++ ++ // DISPCLK and DPPCLK Calculation ++ // ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.WritebackEnable[k]) { ++ mode_lib->vba.WritebackDISPCLK = ++ dml_max( ++ mode_lib->vba.WritebackDISPCLK, ++ CalculateWriteBackDISPCLK( ++ mode_lib->vba.WritebackPixelFormat[k], ++ mode_lib->vba.PixelClock[k], ++ mode_lib->vba.WritebackHRatio[k], ++ mode_lib->vba.WritebackVRatio[k], ++ mode_lib->vba.WritebackLumaHTaps[k], ++ mode_lib->vba.WritebackLumaVTaps[k], ++ mode_lib->vba.WritebackChromaHTaps[k], ++ mode_lib->vba.WritebackChromaVTaps[k], ++ mode_lib->vba.WritebackDestinationWidth[k], ++ mode_lib->vba.HTotal[k], ++ mode_lib->vba.WritebackChromaLineBufferWidth)); ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.HRatio[k] > 1) { ++ locals->PSCL_THROUGHPUT_LUMA[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput ++ * mode_lib->vba.HRatio[k] ++ / dml_ceil( ++ mode_lib->vba.htaps[k] ++ / 6.0, ++ 1)); ++ } else { ++ locals->PSCL_THROUGHPUT_LUMA[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput); ++ } ++ ++ mode_lib->vba.DPPCLKUsingSingleDPPLuma = ++ mode_lib->vba.PixelClock[k] ++ * dml_max( ++ mode_lib->vba.vtaps[k] / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k]), ++ dml_max( ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / locals->PSCL_THROUGHPUT_LUMA[k], ++ 1.0)); ++ ++ if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) ++ && mode_lib->vba.DPPCLKUsingSingleDPPLuma ++ < 2 * mode_lib->vba.PixelClock[k]) { ++ mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; ++ } ++ ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { ++ locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0; ++ locals->DPPCLKUsingSingleDPP[k] = ++ mode_lib->vba.DPPCLKUsingSingleDPPLuma; ++ } else { ++ if (mode_lib->vba.HRatio[k] > 1) { ++ locals->PSCL_THROUGHPUT_CHROMA[k] = ++ dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput ++ * mode_lib->vba.HRatio[k] ++ / 2 ++ / dml_ceil( ++ mode_lib->vba.HTAPsChroma[k] ++ / 6.0, ++ 1.0)); ++ } else { ++ locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput); ++ } ++ mode_lib->vba.DPPCLKUsingSingleDPPChroma = ++ mode_lib->vba.PixelClock[k] ++ * dml_max( ++ mode_lib->vba.VTAPsChroma[k] ++ / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k] ++ / 2), ++ dml_max( ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / 4 ++ / locals->PSCL_THROUGHPUT_CHROMA[k], ++ 1.0)); ++ ++ if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) ++ && mode_lib->vba.DPPCLKUsingSingleDPPChroma ++ < 2 * mode_lib->vba.PixelClock[k]) { ++ mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 ++ * mode_lib->vba.PixelClock[k]; ++ } ++ ++ locals->DPPCLKUsingSingleDPP[k] = dml_max( ++ mode_lib->vba.DPPCLKUsingSingleDPPLuma, ++ mode_lib->vba.DPPCLKUsingSingleDPPChroma); ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.BlendingAndTiming[k] != k) ++ continue; ++ if (mode_lib->vba.ODMCombineEnabled[k]) { ++ mode_lib->vba.DISPCLKWithRamping = ++ dml_max( ++ mode_lib->vba.DISPCLKWithRamping, ++ mode_lib->vba.PixelClock[k] / 2 ++ * (1 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100) ++ * (1 ++ + mode_lib->vba.DISPCLKRampingMargin ++ / 100)); ++ mode_lib->vba.DISPCLKWithoutRamping = ++ dml_max( ++ mode_lib->vba.DISPCLKWithoutRamping, ++ mode_lib->vba.PixelClock[k] / 2 ++ * (1 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100)); ++ } else if (!mode_lib->vba.ODMCombineEnabled[k]) { ++ mode_lib->vba.DISPCLKWithRamping = ++ dml_max( ++ mode_lib->vba.DISPCLKWithRamping, ++ mode_lib->vba.PixelClock[k] ++ * (1 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100) ++ * (1 ++ + mode_lib->vba.DISPCLKRampingMargin ++ / 100)); ++ mode_lib->vba.DISPCLKWithoutRamping = ++ dml_max( ++ mode_lib->vba.DISPCLKWithoutRamping, ++ mode_lib->vba.PixelClock[k] ++ * (1 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100)); ++ } ++ } ++ ++ mode_lib->vba.DISPCLKWithRamping = dml_max( ++ mode_lib->vba.DISPCLKWithRamping, ++ mode_lib->vba.WritebackDISPCLK); ++ mode_lib->vba.DISPCLKWithoutRamping = dml_max( ++ mode_lib->vba.DISPCLKWithoutRamping, ++ mode_lib->vba.WritebackDISPCLK); ++ ++ ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); ++ mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( ++ mode_lib->vba.DISPCLKWithRamping, ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( ++ mode_lib->vba.DISPCLKWithoutRamping, ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( ++ mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz, ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity ++ > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { ++ mode_lib->vba.DISPCLK_calculated = ++ mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; ++ } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity ++ > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { ++ mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; ++ } else { ++ mode_lib->vba.DISPCLK_calculated = ++ mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; ++ } ++ DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k] ++ / mode_lib->vba.DPPPerPlane[k] ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); ++ mode_lib->vba.GlobalDPPCLK = dml_max( ++ mode_lib->vba.GlobalDPPCLK, ++ mode_lib->vba.DPPCLK_calculated[k]); ++ } ++ mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( ++ mode_lib->vba.GlobalDPPCLK, ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 ++ * dml_ceil( ++ mode_lib->vba.DPPCLK_calculated[k] * 255 ++ / mode_lib->vba.GlobalDPPCLK, ++ 1); ++ DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); ++ } ++ ++ // Urgent and B P-State/DRAM Clock Change Watermark ++ DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); ++ DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); ++ DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ bool MainPlaneDoesODMCombine = false; ++ ++ if (mode_lib->vba.SourceScan[k] == dm_horz) ++ locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; ++ else ++ locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; ++ ++ if (mode_lib->vba.ODMCombineEnabled[k] == true) ++ MainPlaneDoesODMCombine = true; ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) ++ if (mode_lib->vba.BlendingAndTiming[k] == j ++ && mode_lib->vba.ODMCombineEnabled[j] == true) ++ MainPlaneDoesODMCombine = true; ++ ++ if (MainPlaneDoesODMCombine == true) ++ locals->SwathWidthY[k] = dml_min( ++ (double) locals->SwathWidthSingleDPPY[k], ++ dml_round( ++ mode_lib->vba.HActive[k] / 2.0 ++ * mode_lib->vba.HRatio[k])); ++ else ++ locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k] ++ / mode_lib->vba.DPPPerPlane[k]; ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { ++ locals->BytePerPixelDETY[k] = 8; ++ locals->BytePerPixelDETC[k] = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { ++ locals->BytePerPixelDETY[k] = 4; ++ locals->BytePerPixelDETC[k] = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { ++ locals->BytePerPixelDETY[k] = 2; ++ locals->BytePerPixelDETC[k] = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { ++ locals->BytePerPixelDETY[k] = 1; ++ locals->BytePerPixelDETC[k] = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { ++ locals->BytePerPixelDETY[k] = 1; ++ locals->BytePerPixelDETC[k] = 2; ++ } else { // dm_420_10 ++ locals->BytePerPixelDETY[k] = 4.0 / 3.0; ++ locals->BytePerPixelDETC[k] = 8.0 / 3.0; ++ } ++ } ++ ++ mode_lib->vba.TotalDataReadBandwidth = 0.0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k] ++ * dml_ceil(locals->BytePerPixelDETY[k], 1) ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) ++ * mode_lib->vba.VRatio[k]; ++ locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k] ++ / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2) ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) ++ * mode_lib->vba.VRatio[k] / 2; ++ DTRACE( ++ " read_bw[%i] = %fBps", ++ k, ++ locals->ReadBandwidthPlaneLuma[k] ++ + locals->ReadBandwidthPlaneChroma[k]); ++ mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k] ++ + locals->ReadBandwidthPlaneChroma[k]; ++ } ++ ++ // DCFCLK Deep Sleep ++ CalculateDCFCLKDeepSleep( ++ mode_lib, ++ mode_lib->vba.NumberOfActivePlanes, ++ locals->BytePerPixelDETY, ++ locals->BytePerPixelDETC, ++ mode_lib->vba.VRatio, ++ locals->SwathWidthY, ++ mode_lib->vba.DPPPerPlane, ++ mode_lib->vba.HRatio, ++ mode_lib->vba.PixelClock, ++ locals->PSCL_THROUGHPUT_LUMA, ++ locals->PSCL_THROUGHPUT_CHROMA, ++ locals->DPPCLK, ++ &mode_lib->vba.DCFCLKDeepSleep); ++ ++ // DSCCLK ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { ++ locals->DSCCLK_calculated[k] = 0.0; ++ } else { ++ if (mode_lib->vba.OutputFormat[k] == dm_420 ++ || mode_lib->vba.OutputFormat[k] == dm_n422) ++ mode_lib->vba.DSCFormatFactor = 2; ++ else ++ mode_lib->vba.DSCFormatFactor = 1; ++ if (mode_lib->vba.ODMCombineEnabled[k]) ++ locals->DSCCLK_calculated[k] = ++ mode_lib->vba.PixelClockBackEnd[k] / 6 ++ / mode_lib->vba.DSCFormatFactor ++ / (1 ++ - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100); ++ else ++ locals->DSCCLK_calculated[k] = ++ mode_lib->vba.PixelClockBackEnd[k] / 3 ++ / mode_lib->vba.DSCFormatFactor ++ / (1 ++ - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100); ++ } ++ } ++ ++ // DSC Delay ++ // TODO ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ double bpp = mode_lib->vba.OutputBpp[k]; ++ unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; ++ ++ if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { ++ if (!mode_lib->vba.ODMCombineEnabled[k]) { ++ locals->DSCDelay[k] = ++ dscceComputeDelay( ++ mode_lib->vba.DSCInputBitPerComponent[k], ++ bpp, ++ dml_ceil( ++ (double) mode_lib->vba.HActive[k] ++ / mode_lib->vba.NumberOfDSCSlices[k], ++ 1), ++ slices, ++ mode_lib->vba.OutputFormat[k]) ++ + dscComputeDelay( ++ mode_lib->vba.OutputFormat[k]); ++ } else { ++ locals->DSCDelay[k] = ++ 2 ++ * (dscceComputeDelay( ++ mode_lib->vba.DSCInputBitPerComponent[k], ++ bpp, ++ dml_ceil( ++ (double) mode_lib->vba.HActive[k] ++ / mode_lib->vba.NumberOfDSCSlices[k], ++ 1), ++ slices / 2.0, ++ mode_lib->vba.OutputFormat[k]) ++ + dscComputeDelay( ++ mode_lib->vba.OutputFormat[k])); ++ } ++ locals->DSCDelay[k] = locals->DSCDelay[k] ++ * mode_lib->vba.PixelClock[k] ++ / mode_lib->vba.PixelClockBackEnd[k]; ++ } else { ++ locals->DSCDelay[k] = 0; ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes ++ if (j != k && mode_lib->vba.BlendingAndTiming[k] == j ++ && mode_lib->vba.DSCEnabled[j]) ++ locals->DSCDelay[k] = locals->DSCDelay[j]; ++ ++ // Prefetch ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ unsigned int PDEAndMetaPTEBytesFrameY; ++ unsigned int PixelPTEBytesPerRowY; ++ unsigned int MetaRowByteY; ++ unsigned int MetaRowByteC; ++ unsigned int PDEAndMetaPTEBytesFrameC; ++ unsigned int PixelPTEBytesPerRowC; ++ bool PTEBufferSizeNotExceededY; ++ bool PTEBufferSizeNotExceededC; ++ ++ Calculate256BBlockSizes( ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(locals->BytePerPixelDETY[k], 1), ++ dml_ceil(locals->BytePerPixelDETC[k], 2), ++ &locals->BlockHeight256BytesY[k], ++ &locals->BlockHeight256BytesC[k], ++ &locals->BlockWidth256BytesY[k], ++ &locals->BlockWidth256BytesC[k]); ++ ++ locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.vtaps[k], ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ mode_lib->vba.SwathHeightY[k], ++ mode_lib->vba.ViewportYStartY[k], ++ &locals->VInitPreFillY[k], ++ &locals->MaxNumSwathY[k]); ++ ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { ++ PDEAndMetaPTEBytesFrameC = ++ CalculateVMAndRowBytes( ++ mode_lib, ++ mode_lib->vba.DCCEnable[k], ++ locals->BlockHeight256BytesC[k], ++ locals->BlockWidth256BytesC[k], ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil( ++ locals->BytePerPixelDETC[k], ++ 2), ++ mode_lib->vba.SourceScan[k], ++ mode_lib->vba.ViewportWidth[k] / 2, ++ mode_lib->vba.ViewportHeight[k] / 2, ++ locals->SwathWidthY[k] / 2, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.HostVMEnable, ++ mode_lib->vba.HostVMMaxPageTableLevels, ++ mode_lib->vba.HostVMCachedPageTableLevels, ++ mode_lib->vba.VMMPageSize, ++ mode_lib->vba.PTEBufferSizeInRequestsChroma, ++ mode_lib->vba.PitchC[k], ++ mode_lib->vba.DCCMetaPitchC[k], ++ &locals->MacroTileWidthC[k], ++ &MetaRowByteC, ++ &PixelPTEBytesPerRowC, ++ &PTEBufferSizeNotExceededC, ++ &locals->dpte_row_width_chroma_ub[k], ++ &locals->dpte_row_height_chroma[k], ++ &locals->meta_req_width_chroma[k], ++ &locals->meta_req_height_chroma[k], ++ &locals->meta_row_width_chroma[k], ++ &locals->meta_row_height_chroma[k], ++ &locals->vm_group_bytes_chroma, ++ &locals->dpte_group_bytes_chroma, ++ &locals->PixelPTEReqWidthC[k], ++ &locals->PixelPTEReqHeightC[k], ++ &locals->PTERequestSizeC[k], ++ &locals->dpde0_bytes_per_frame_ub_c[k], ++ &locals->meta_pte_bytes_per_frame_ub_c[k]); ++ ++ locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( ++ mode_lib, ++ mode_lib->vba.VRatio[k] / 2, ++ mode_lib->vba.VTAPsChroma[k], ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ mode_lib->vba.SwathHeightC[k], ++ mode_lib->vba.ViewportYStartC[k], ++ &locals->VInitPreFillC[k], ++ &locals->MaxNumSwathC[k]); ++ } else { ++ PixelPTEBytesPerRowC = 0; ++ PDEAndMetaPTEBytesFrameC = 0; ++ MetaRowByteC = 0; ++ locals->MaxNumSwathC[k] = 0; ++ locals->PrefetchSourceLinesC[k] = 0; ++ locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; ++ } ++ ++ PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( ++ mode_lib, ++ mode_lib->vba.DCCEnable[k], ++ locals->BlockHeight256BytesY[k], ++ locals->BlockWidth256BytesY[k], ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(locals->BytePerPixelDETY[k], 1), ++ mode_lib->vba.SourceScan[k], ++ mode_lib->vba.ViewportWidth[k], ++ mode_lib->vba.ViewportHeight[k], ++ locals->SwathWidthY[k], ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.HostVMEnable, ++ mode_lib->vba.HostVMMaxPageTableLevels, ++ mode_lib->vba.HostVMCachedPageTableLevels, ++ mode_lib->vba.VMMPageSize, ++ locals->PTEBufferSizeInRequestsForLuma, ++ mode_lib->vba.PitchY[k], ++ mode_lib->vba.DCCMetaPitchY[k], ++ &locals->MacroTileWidthY[k], ++ &MetaRowByteY, ++ &PixelPTEBytesPerRowY, ++ &PTEBufferSizeNotExceededY, ++ &locals->dpte_row_width_luma_ub[k], ++ &locals->dpte_row_height[k], ++ &locals->meta_req_width[k], ++ &locals->meta_req_height[k], ++ &locals->meta_row_width[k], ++ &locals->meta_row_height[k], ++ &locals->vm_group_bytes[k], ++ &locals->dpte_group_bytes[k], ++ &locals->PixelPTEReqWidthY[k], ++ &locals->PixelPTEReqHeightY[k], ++ &locals->PTERequestSizeY[k], ++ &locals->dpde0_bytes_per_frame_ub_l[k], ++ &locals->meta_pte_bytes_per_frame_ub_l[k]); ++ ++ locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; ++ locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY ++ + PDEAndMetaPTEBytesFrameC; ++ locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; ++ ++ CalculateActiveRowBandwidth( ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ MetaRowByteY, ++ MetaRowByteC, ++ locals->meta_row_height[k], ++ locals->meta_row_height_chroma[k], ++ PixelPTEBytesPerRowY, ++ PixelPTEBytesPerRowC, ++ locals->dpte_row_height[k], ++ locals->dpte_row_height_chroma[k], ++ &locals->meta_row_bw[k], ++ &locals->dpte_row_bw[k]); ++ } ++ ++ mode_lib->vba.TotalDCCActiveDPP = 0; ++ mode_lib->vba.TotalActiveDPP = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP ++ + mode_lib->vba.DPPPerPlane[k]; ++ if (mode_lib->vba.DCCEnable[k]) ++ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP ++ + mode_lib->vba.DPPPerPlane[k]; ++ } ++ ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3( ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); ++ ++ mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = ++ (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK ++ + mode_lib->vba.UrgentOutOfOrderReturnPerChannel ++ * mode_lib->vba.NumberOfChannels ++ / mode_lib->vba.ReturnBW; ++ ++ mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency( ++ mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency, ++ mode_lib->vba.TotalActiveDPP, ++ mode_lib->vba.PixelChunkSizeInKByte, ++ mode_lib->vba.TotalDCCActiveDPP, ++ mode_lib->vba.MetaChunkSize, ++ mode_lib->vba.ReturnBW, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.HostVMEnable, ++ mode_lib->vba.NumberOfActivePlanes, ++ mode_lib->vba.DPPPerPlane, ++ locals->dpte_group_bytes, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ mode_lib->vba.HostVMMaxPageTableLevels, ++ mode_lib->vba.HostVMCachedPageTableLevels); ++ ++ ++ mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = ++ mode_lib->vba.WritebackLatency ++ + CalculateWriteBackDelay( ++ mode_lib->vba.WritebackPixelFormat[k], ++ mode_lib->vba.WritebackHRatio[k], ++ mode_lib->vba.WritebackVRatio[k], ++ mode_lib->vba.WritebackLumaHTaps[k], ++ mode_lib->vba.WritebackLumaVTaps[k], ++ mode_lib->vba.WritebackChromaHTaps[k], ++ mode_lib->vba.WritebackChromaVTaps[k], ++ mode_lib->vba.WritebackDestinationWidth[k]) ++ / mode_lib->vba.DISPCLK; ++ } else ++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { ++ if (mode_lib->vba.BlendingAndTiming[j] == k ++ && mode_lib->vba.WritebackEnable[j] == true) { ++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = ++ dml_max( ++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k], ++ mode_lib->vba.WritebackLatency ++ + CalculateWriteBackDelay( ++ mode_lib->vba.WritebackPixelFormat[j], ++ mode_lib->vba.WritebackHRatio[j], ++ mode_lib->vba.WritebackVRatio[j], ++ mode_lib->vba.WritebackLumaHTaps[j], ++ mode_lib->vba.WritebackLumaVTaps[j], ++ mode_lib->vba.WritebackChromaHTaps[j], ++ mode_lib->vba.WritebackChromaVTaps[j], ++ mode_lib->vba.WritebackDestinationWidth[j]) ++ / mode_lib->vba.DISPCLK); ++ } ++ } ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) ++ if (mode_lib->vba.BlendingAndTiming[k] == j) ++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = ++ locals->WritebackDelay[mode_lib->vba.VoltageLevel][j]; ++ ++ mode_lib->vba.VStartupLines = 13; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1)); ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) ++ locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]); ++ ++ // We don't really care to iterate between the various prefetch modes ++ //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode); ++ mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly); ++ ++ do { ++ double MaxTotalRDBandwidth = 0; ++ double MaxTotalRDBandwidthNoUrgentBurst = 0; ++ bool DestinationLineTimesForPrefetchLessThan2 = false; ++ bool VRatioPrefetchMoreThan4 = false; ++ double TWait = CalculateTWait( ++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], ++ mode_lib->vba.DRAMClockChangeLatency, ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.SREnterPlusExitTime); ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ Pipe myPipe; ++ HostVM myHostVM; ++ ++ if (mode_lib->vba.XFCEnabled[k] == true) { ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = ++ CalculateRemoteSurfaceFlipDelay( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ locals->SwathWidthY[k], ++ dml_ceil( ++ locals->BytePerPixelDETY[k], ++ 1), ++ mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.XFCTSlvVupdateOffset, ++ mode_lib->vba.XFCTSlvVupdateWidth, ++ mode_lib->vba.XFCTSlvVreadyOffset, ++ mode_lib->vba.XFCXBUFLatencyTolerance, ++ mode_lib->vba.XFCFillBWOverhead, ++ mode_lib->vba.XFCSlvChunkSize, ++ mode_lib->vba.XFCBusTransportTime, ++ mode_lib->vba.TCalc, ++ TWait, ++ &mode_lib->vba.SrcActiveDrainRate, ++ &mode_lib->vba.TInitXFill, ++ &mode_lib->vba.TslvChk); ++ } else { ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; ++ } ++ ++ myPipe.DPPCLK = locals->DPPCLK[k]; ++ myPipe.DISPCLK = mode_lib->vba.DISPCLK; ++ myPipe.PixelClock = mode_lib->vba.PixelClock[k]; ++ myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep; ++ myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k]; ++ myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; ++ myPipe.SourceScan = mode_lib->vba.SourceScan[k]; ++ myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; ++ myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; ++ myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; ++ myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; ++ myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; ++ myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; ++ myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; ++ myPipe.HTotal = mode_lib->vba.HTotal[k]; ++ ++ ++ myHostVM.Enable = mode_lib->vba.HostVMEnable; ++ myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels; ++ myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels; ++ ++ mode_lib->vba.ErrorResult[k] = ++ CalculatePrefetchSchedule( ++ mode_lib, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ &myPipe, ++ locals->DSCDelay[k], ++ mode_lib->vba.DPPCLKDelaySubtotal, ++ mode_lib->vba.DPPCLKDelaySCL, ++ mode_lib->vba.DPPCLKDelaySCLLBOnly, ++ mode_lib->vba.DPPCLKDelayCNVCFormater, ++ mode_lib->vba.DPPCLKDelayCNVCCursor, ++ mode_lib->vba.DISPCLKDelaySubtotal, ++ (unsigned int) (locals->SwathWidthY[k] ++ / mode_lib->vba.HRatio[k]), ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.MaxInterDCNTileRepeaters, ++ dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]), ++ locals->MaxVStartupLines[k], ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.GPUVMEnable, ++ &myHostVM, ++ mode_lib->vba.DynamicMetadataEnable[k], ++ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], ++ mode_lib->vba.DynamicMetadataTransmittedBytes[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.UrgentExtraLatency, ++ mode_lib->vba.TCalc, ++ locals->PDEAndMetaPTEBytesFrame[k], ++ locals->MetaRowByte[k], ++ locals->PixelPTEBytesPerRow[k], ++ locals->PrefetchSourceLinesY[k], ++ locals->SwathWidthY[k], ++ locals->BytePerPixelDETY[k], ++ locals->VInitPreFillY[k], ++ locals->MaxNumSwathY[k], ++ locals->PrefetchSourceLinesC[k], ++ locals->BytePerPixelDETC[k], ++ locals->VInitPreFillC[k], ++ locals->MaxNumSwathC[k], ++ mode_lib->vba.SwathHeightY[k], ++ mode_lib->vba.SwathHeightC[k], ++ TWait, ++ mode_lib->vba.XFCEnabled[k], ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay, ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ &locals->DSTXAfterScaler[k], ++ &locals->DSTYAfterScaler[k], ++ &locals->DestinationLinesForPrefetch[k], ++ &locals->PrefetchBandwidth[k], ++ &locals->DestinationLinesToRequestVMInVBlank[k], ++ &locals->DestinationLinesToRequestRowInVBlank[k], ++ &locals->VRatioPrefetchY[k], ++ &locals->VRatioPrefetchC[k], ++ &locals->RequiredPrefetchPixDataBWLuma[k], ++ &locals->RequiredPrefetchPixDataBWChroma[k], ++ &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, ++ &locals->Tno_bw[k], ++ &locals->prefetch_vmrow_bw[k], ++ &locals->swath_width_luma_ub[k], ++ &locals->swath_width_chroma_ub[k], ++ &mode_lib->vba.VUpdateOffsetPix[k], ++ &mode_lib->vba.VUpdateWidthPix[k], ++ &mode_lib->vba.VReadyOffsetPix[k]); ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ locals->VStartup[k] = dml_min( ++ mode_lib->vba.VStartupLines, ++ locals->MaxVStartupLines[k]); ++ if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata ++ != 0) { ++ locals->VStartup[k] = ++ locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; ++ } ++ } else { ++ locals->VStartup[k] = ++ dml_min( ++ mode_lib->vba.VStartupLines, ++ locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); ++ } ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ unsigned int m; ++ ++ locals->cursor_bw[k] = 0; ++ locals->cursor_bw_pre[k] = 0; ++ for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { ++ locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; ++ locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k]; ++ } ++ ++ CalculateUrgentBurstFactor( ++ mode_lib->vba.DETBufferSizeInKByte, ++ mode_lib->vba.SwathHeightY[k], ++ mode_lib->vba.SwathHeightC[k], ++ locals->SwathWidthY[k], ++ mode_lib->vba.HTotal[k] / ++ mode_lib->vba.PixelClock[k], ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.CursorBufferSize, ++ mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1], ++ dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]), ++ mode_lib->vba.VRatio[k], ++ locals->VRatioPrefetchY[k], ++ locals->VRatioPrefetchC[k], ++ locals->BytePerPixelDETY[k], ++ locals->BytePerPixelDETC[k], ++ &locals->UrgentBurstFactorCursor[k], ++ &locals->UrgentBurstFactorCursorPre[k], ++ &locals->UrgentBurstFactorLuma[k], ++ &locals->UrgentBurstFactorLumaPre[k], ++ &locals->UrgentBurstFactorChroma[k], ++ &locals->UrgentBurstFactorChromaPre[k], ++ &locals->NotEnoughUrgentLatencyHiding, ++ &locals->NotEnoughUrgentLatencyHidingPre); ++ ++ if (mode_lib->vba.UseUrgentBurstBandwidth == false) { ++ locals->UrgentBurstFactorLuma[k] = 1; ++ locals->UrgentBurstFactorChroma[k] = 1; ++ locals->UrgentBurstFactorCursor[k] = 1; ++ locals->UrgentBurstFactorLumaPre[k] = 1; ++ locals->UrgentBurstFactorChromaPre[k] = 1; ++ locals->UrgentBurstFactorCursorPre[k] = 1; ++ } ++ ++ MaxTotalRDBandwidth = MaxTotalRDBandwidth + ++ dml_max3(locals->prefetch_vmrow_bw[k], ++ locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k] ++ + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] ++ * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k], ++ locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k] ++ * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); ++ ++ MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst + ++ dml_max3(locals->prefetch_vmrow_bw[k], ++ locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k] ++ + locals->meta_row_bw[k] + locals->dpte_row_bw[k], ++ locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]); ++ ++ if (locals->DestinationLinesForPrefetch[k] < 2) ++ DestinationLineTimesForPrefetchLessThan2 = true; ++ if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4) ++ VRatioPrefetchMoreThan4 = true; ++ } ++ mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW; ++ ++ if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding == 0 && locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 ++ && !DestinationLineTimesForPrefetchLessThan2) ++ mode_lib->vba.PrefetchModeSupported = true; ++ else { ++ mode_lib->vba.PrefetchModeSupported = false; ++ dml_print( ++ "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); ++ } ++ ++ if (mode_lib->vba.PrefetchModeSupported == true) { ++ mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.BandwidthAvailableForImmediateFlip = ++ mode_lib->vba.BandwidthAvailableForImmediateFlip ++ - dml_max( ++ locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k] ++ + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] ++ + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], ++ locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + ++ locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] + ++ locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); ++ } ++ ++ mode_lib->vba.TotImmediateFlipBytes = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k]; ++ } ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ CalculateFlipSchedule( ++ mode_lib, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ mode_lib->vba.UrgentExtraLatency, ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.HostVMEnable, ++ mode_lib->vba.HostVMMaxPageTableLevels, ++ mode_lib->vba.HostVMCachedPageTableLevels, ++ mode_lib->vba.GPUVMEnable, ++ locals->PDEAndMetaPTEBytesFrame[k], ++ locals->MetaRowByte[k], ++ locals->PixelPTEBytesPerRow[k], ++ mode_lib->vba.BandwidthAvailableForImmediateFlip, ++ mode_lib->vba.TotImmediateFlipBytes, ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.VRatio[k], ++ locals->Tno_bw[k], ++ mode_lib->vba.DCCEnable[k], ++ locals->dpte_row_height[k], ++ locals->meta_row_height[k], ++ locals->dpte_row_height_chroma[k], ++ locals->meta_row_height_chroma[k], ++ &locals->DestinationLinesToRequestVMInImmediateFlip[k], ++ &locals->DestinationLinesToRequestRowInImmediateFlip[k], ++ &locals->final_flip_bw[k], ++ &locals->ImmediateFlipSupportedForPipe[k]); ++ } ++ mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; ++ mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ mode_lib->vba.total_dcn_read_bw_with_flip = ++ mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3( ++ locals->prefetch_vmrow_bw[k], ++ locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] ++ + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], ++ locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] ++ + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] ++ + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); ++ mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = ++ mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst + ++ dml_max3(locals->prefetch_vmrow_bw[k], ++ locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k], ++ locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]); ++ ++ } ++ mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW; ++ ++ mode_lib->vba.ImmediateFlipSupported = true; ++ if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { ++ mode_lib->vba.ImmediateFlipSupported = false; ++ } ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (locals->ImmediateFlipSupportedForPipe[k] == false) { ++ mode_lib->vba.ImmediateFlipSupported = false; ++ } ++ } ++ } else { ++ mode_lib->vba.ImmediateFlipSupported = false; ++ } ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.ErrorResult[k]) { ++ mode_lib->vba.PrefetchModeSupported = false; ++ dml_print( ++ "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); ++ } ++ } ++ ++ mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; ++ } while (!((mode_lib->vba.PrefetchModeSupported ++ && ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable) ++ || mode_lib->vba.ImmediateFlipSupported)) ++ || locals->MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); ++ ++ //Watermarks and NB P-State/DRAM Clock Change Support ++ { ++ enum clock_change_support DRAMClockChangeSupport; // dummy ++ CalculateWatermarksAndDRAMSpeedChangeSupport( ++ mode_lib, ++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], ++ mode_lib->vba.NumberOfActivePlanes, ++ mode_lib->vba.MaxLineBufferLines, ++ mode_lib->vba.LineBufferSize, ++ mode_lib->vba.DPPOutputBufferPixels, ++ mode_lib->vba.DETBufferSizeInKByte, ++ mode_lib->vba.WritebackInterfaceLumaBufferSize, ++ mode_lib->vba.WritebackInterfaceChromaBufferSize, ++ mode_lib->vba.DCFCLK, ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, ++ mode_lib->vba.ReturnBW, ++ mode_lib->vba.GPUVMEnable, ++ locals->dpte_group_bytes, ++ mode_lib->vba.MetaChunkSize, ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.UrgentExtraLatency, ++ mode_lib->vba.WritebackLatency, ++ mode_lib->vba.WritebackChunkSize, ++ mode_lib->vba.SOCCLK, ++ mode_lib->vba.DRAMClockChangeLatency, ++ mode_lib->vba.SRExitTime, ++ mode_lib->vba.SREnterPlusExitTime, ++ mode_lib->vba.DCFCLKDeepSleep, ++ mode_lib->vba.DPPPerPlane, ++ mode_lib->vba.DCCEnable, ++ locals->DPPCLK, ++ locals->SwathWidthSingleDPPY, ++ mode_lib->vba.SwathHeightY, ++ locals->ReadBandwidthPlaneLuma, ++ mode_lib->vba.SwathHeightC, ++ locals->ReadBandwidthPlaneChroma, ++ mode_lib->vba.LBBitPerPixel, ++ locals->SwathWidthY, ++ mode_lib->vba.HRatio, ++ mode_lib->vba.vtaps, ++ mode_lib->vba.VTAPsChroma, ++ mode_lib->vba.VRatio, ++ mode_lib->vba.HTotal, ++ mode_lib->vba.PixelClock, ++ mode_lib->vba.BlendingAndTiming, ++ locals->BytePerPixelDETY, ++ locals->BytePerPixelDETC, ++ mode_lib->vba.WritebackEnable, ++ mode_lib->vba.WritebackPixelFormat, ++ mode_lib->vba.WritebackDestinationWidth, ++ mode_lib->vba.WritebackDestinationHeight, ++ mode_lib->vba.WritebackSourceHeight, ++ &DRAMClockChangeSupport, ++ &mode_lib->vba.UrgentWatermark, ++ &mode_lib->vba.WritebackUrgentWatermark, ++ &mode_lib->vba.DRAMClockChangeWatermark, ++ &mode_lib->vba.WritebackDRAMClockChangeWatermark, ++ &mode_lib->vba.StutterExitWatermark, ++ &mode_lib->vba.StutterEnterPlusExitWatermark, ++ &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported); ++ } ++ ++ ++ //Display Pipeline Delivery Time in Prefetch, Groups ++ CalculatePixelDeliveryTimes( ++ mode_lib->vba.NumberOfActivePlanes, ++ mode_lib->vba.VRatio, ++ locals->VRatioPrefetchY, ++ locals->VRatioPrefetchC, ++ locals->swath_width_luma_ub, ++ locals->swath_width_chroma_ub, ++ mode_lib->vba.DPPPerPlane, ++ mode_lib->vba.HRatio, ++ mode_lib->vba.PixelClock, ++ locals->PSCL_THROUGHPUT_LUMA, ++ locals->PSCL_THROUGHPUT_CHROMA, ++ locals->DPPCLK, ++ locals->BytePerPixelDETC, ++ mode_lib->vba.SourceScan, ++ locals->BlockWidth256BytesY, ++ locals->BlockHeight256BytesY, ++ locals->BlockWidth256BytesC, ++ locals->BlockHeight256BytesC, ++ locals->DisplayPipeLineDeliveryTimeLuma, ++ locals->DisplayPipeLineDeliveryTimeChroma, ++ locals->DisplayPipeLineDeliveryTimeLumaPrefetch, ++ locals->DisplayPipeLineDeliveryTimeChromaPrefetch, ++ locals->DisplayPipeRequestDeliveryTimeLuma, ++ locals->DisplayPipeRequestDeliveryTimeChroma, ++ locals->DisplayPipeRequestDeliveryTimeLumaPrefetch, ++ locals->DisplayPipeRequestDeliveryTimeChromaPrefetch); ++ ++ CalculateMetaAndPTETimes( ++ mode_lib->vba.NumberOfActivePlanes, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.MetaChunkSize, ++ mode_lib->vba.MinMetaChunkSizeBytes, ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.HTotal, ++ mode_lib->vba.VRatio, ++ locals->VRatioPrefetchY, ++ locals->VRatioPrefetchC, ++ locals->DestinationLinesToRequestRowInVBlank, ++ locals->DestinationLinesToRequestRowInImmediateFlip, ++ locals->DestinationLinesToRequestVMInVBlank, ++ locals->DestinationLinesToRequestVMInImmediateFlip, ++ mode_lib->vba.DCCEnable, ++ mode_lib->vba.PixelClock, ++ locals->BytePerPixelDETY, ++ locals->BytePerPixelDETC, ++ mode_lib->vba.SourceScan, ++ locals->dpte_row_height, ++ locals->dpte_row_height_chroma, ++ locals->meta_row_width, ++ locals->meta_row_height, ++ locals->meta_req_width, ++ locals->meta_req_height, ++ locals->dpte_group_bytes, ++ locals->PTERequestSizeY, ++ locals->PTERequestSizeC, ++ locals->PixelPTEReqWidthY, ++ locals->PixelPTEReqHeightY, ++ locals->PixelPTEReqWidthC, ++ locals->PixelPTEReqHeightC, ++ locals->dpte_row_width_luma_ub, ++ locals->dpte_row_width_chroma_ub, ++ locals->vm_group_bytes, ++ locals->dpde0_bytes_per_frame_ub_l, ++ locals->dpde0_bytes_per_frame_ub_c, ++ locals->meta_pte_bytes_per_frame_ub_l, ++ locals->meta_pte_bytes_per_frame_ub_c, ++ locals->DST_Y_PER_PTE_ROW_NOM_L, ++ locals->DST_Y_PER_PTE_ROW_NOM_C, ++ locals->DST_Y_PER_META_ROW_NOM_L, ++ locals->TimePerMetaChunkNominal, ++ locals->TimePerMetaChunkVBlank, ++ locals->TimePerMetaChunkFlip, ++ locals->time_per_pte_group_nom_luma, ++ locals->time_per_pte_group_vblank_luma, ++ locals->time_per_pte_group_flip_luma, ++ locals->time_per_pte_group_nom_chroma, ++ locals->time_per_pte_group_vblank_chroma, ++ locals->time_per_pte_group_flip_chroma, ++ locals->TimePerVMGroupVBlank, ++ locals->TimePerVMGroupFlip, ++ locals->TimePerVMRequestVBlank, ++ locals->TimePerVMRequestFlip); ++ ++ ++ // Min TTUVBlank ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { ++ locals->AllowDRAMClockChangeDuringVBlank[k] = true; ++ locals->AllowDRAMSelfRefreshDuringVBlank[k] = true; ++ locals->MinTTUVBlank[k] = dml_max( ++ mode_lib->vba.DRAMClockChangeWatermark, ++ dml_max( ++ mode_lib->vba.StutterEnterPlusExitWatermark, ++ mode_lib->vba.UrgentWatermark)); ++ } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) { ++ locals->AllowDRAMClockChangeDuringVBlank[k] = false; ++ locals->AllowDRAMSelfRefreshDuringVBlank[k] = true; ++ locals->MinTTUVBlank[k] = dml_max( ++ mode_lib->vba.StutterEnterPlusExitWatermark, ++ mode_lib->vba.UrgentWatermark); ++ } else { ++ locals->AllowDRAMClockChangeDuringVBlank[k] = false; ++ locals->AllowDRAMSelfRefreshDuringVBlank[k] = false; ++ locals->MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; ++ } ++ if (!mode_lib->vba.DynamicMetadataEnable[k]) ++ locals->MinTTUVBlank[k] = mode_lib->vba.TCalc ++ + locals->MinTTUVBlank[k]; ++ } ++ ++ // DCC Configuration ++ mode_lib->vba.ActiveDPPs = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ locals->MaximumDCCCompressionYSurface[k] = CalculateDCCConfiguration( ++ mode_lib->vba.DCCEnable[k], ++ false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, ++ mode_lib->vba.ViewportWidth[k], ++ mode_lib->vba.ViewportHeight[k], ++ mode_lib->vba.DETBufferSizeInKByte * 1024, ++ locals->BlockHeight256BytesY[k], ++ mode_lib->vba.SwathHeightY[k], ++ mode_lib->vba.SurfaceTiling[k], ++ locals->BytePerPixelDETY[k], ++ mode_lib->vba.SourceScan[k], ++ &locals->DCCYMaxUncompressedBlock[k], ++ &locals->DCCYMaxCompressedBlock[k], ++ &locals->DCCYIndependent64ByteBlock[k]); ++ } ++ ++ //XFC Parameters: ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.XFCEnabled[k] == true) { ++ double TWait; ++ ++ locals->XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; ++ locals->XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; ++ locals->XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; ++ TWait = CalculateTWait( ++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], ++ mode_lib->vba.DRAMClockChangeLatency, ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.SREnterPlusExitTime); ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ locals->SwathWidthY[k], ++ dml_ceil(locals->BytePerPixelDETY[k], 1), ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.XFCTSlvVupdateOffset, ++ mode_lib->vba.XFCTSlvVupdateWidth, ++ mode_lib->vba.XFCTSlvVreadyOffset, ++ mode_lib->vba.XFCXBUFLatencyTolerance, ++ mode_lib->vba.XFCFillBWOverhead, ++ mode_lib->vba.XFCSlvChunkSize, ++ mode_lib->vba.XFCBusTransportTime, ++ mode_lib->vba.TCalc, ++ TWait, ++ &mode_lib->vba.SrcActiveDrainRate, ++ &mode_lib->vba.TInitXFill, ++ &mode_lib->vba.TslvChk); ++ locals->XFCRemoteSurfaceFlipLatency[k] = ++ dml_floor( ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay ++ / (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]), ++ 1); ++ locals->XFCTransferDelay[k] = ++ dml_ceil( ++ mode_lib->vba.XFCBusTransportTime ++ / (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]), ++ 1); ++ locals->XFCPrechargeDelay[k] = ++ dml_ceil( ++ (mode_lib->vba.XFCBusTransportTime ++ + mode_lib->vba.TInitXFill ++ + mode_lib->vba.TslvChk) ++ / (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]), ++ 1); ++ mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance ++ * mode_lib->vba.SrcActiveDrainRate; ++ mode_lib->vba.FinalFillMargin = ++ (locals->DestinationLinesToRequestVMInVBlank[k] ++ + locals->DestinationLinesToRequestRowInVBlank[k]) ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k] ++ * mode_lib->vba.SrcActiveDrainRate ++ + mode_lib->vba.XFCFillConstant; ++ mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay ++ * mode_lib->vba.SrcActiveDrainRate ++ + mode_lib->vba.FinalFillMargin; ++ mode_lib->vba.RemainingFillLevel = dml_max( ++ 0.0, ++ mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); ++ mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel ++ / (mode_lib->vba.SrcActiveDrainRate ++ * mode_lib->vba.XFCFillBWOverhead / 100); ++ locals->XFCPrefetchMargin[k] = ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay ++ + mode_lib->vba.TFinalxFill ++ + (locals->DestinationLinesToRequestVMInVBlank[k] ++ + locals->DestinationLinesToRequestRowInVBlank[k]) ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]; ++ } else { ++ locals->XFCSlaveVUpdateOffset[k] = 0; ++ locals->XFCSlaveVupdateWidth[k] = 0; ++ locals->XFCSlaveVReadyOffset[k] = 0; ++ locals->XFCRemoteSurfaceFlipLatency[k] = 0; ++ locals->XFCPrechargeDelay[k] = 0; ++ locals->XFCTransferDelay[k] = 0; ++ locals->XFCPrefetchMargin[k] = 0; ++ } ++ } ++ ++ // Stutter Efficiency ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ CalculateDETBufferSize( ++ mode_lib->vba.DETBufferSizeInKByte, ++ mode_lib->vba.SwathHeightY[k], ++ mode_lib->vba.SwathHeightC[k], ++ &locals->DETBufferSizeY[k], ++ &locals->DETBufferSizeC[k]); ++ ++ locals->LinesInDETY[k] = locals->DETBufferSizeY[k] ++ / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k]; ++ locals->LinesInDETYRoundedDownToSwath[k] = dml_floor( ++ locals->LinesInDETY[k], ++ mode_lib->vba.SwathHeightY[k]); ++ locals->FullDETBufferingTimeY[k] = ++ locals->LinesInDETYRoundedDownToSwath[k] ++ * (mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) ++ / mode_lib->vba.VRatio[k]; ++ } ++ ++ mode_lib->vba.StutterPeriod = 999999.0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (locals->FullDETBufferingTimeY[k] < mode_lib->vba.StutterPeriod) { ++ mode_lib->vba.StutterPeriod = locals->FullDETBufferingTimeY[k]; ++ mode_lib->vba.FrameTimeForMinFullDETBufferingTime = ++ (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]; ++ locals->BytePerPixelYCriticalPlane = dml_ceil(locals->BytePerPixelDETY[k], 1); ++ locals->SwathWidthYCriticalPlane = locals->SwathWidthY[k]; ++ locals->LinesToFinishSwathTransferStutterCriticalPlane = ++ mode_lib->vba.SwathHeightY[k] - (locals->LinesInDETY[k] - locals->LinesInDETYRoundedDownToSwath[k]); ++ } ++ } ++ ++ mode_lib->vba.AverageReadBandwidth = 0.0; ++ mode_lib->vba.TotalRowReadBandwidth = 0.0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ unsigned int DCCRateLimit; ++ ++ if (mode_lib->vba.DCCEnable[k]) { ++ if (locals->DCCYMaxCompressedBlock[k] == 256) ++ DCCRateLimit = 4; ++ else ++ DCCRateLimit = 2; ++ ++ mode_lib->vba.AverageReadBandwidth = ++ mode_lib->vba.AverageReadBandwidth ++ + (locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k]) / ++ dml_min(mode_lib->vba.DCCRate[k], DCCRateLimit); ++ } else { ++ mode_lib->vba.AverageReadBandwidth = ++ mode_lib->vba.AverageReadBandwidth ++ + locals->ReadBandwidthPlaneLuma[k] ++ + locals->ReadBandwidthPlaneChroma[k]; ++ } ++ mode_lib->vba.TotalRowReadBandwidth = mode_lib->vba.TotalRowReadBandwidth + ++ locals->meta_row_bw[k] + locals->dpte_row_bw[k]; ++ } ++ ++ mode_lib->vba.AverageDCCCompressionRate = mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.AverageReadBandwidth; ++ ++ mode_lib->vba.PartOfBurstThatFitsInROB = ++ dml_min( ++ mode_lib->vba.StutterPeriod ++ * mode_lib->vba.TotalDataReadBandwidth, ++ mode_lib->vba.ROBBufferSizeInKByte * 1024 ++ * mode_lib->vba.AverageDCCCompressionRate); ++ mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB ++ / mode_lib->vba.AverageDCCCompressionRate / mode_lib->vba.ReturnBW ++ + (mode_lib->vba.StutterPeriod * mode_lib->vba.TotalDataReadBandwidth ++ - mode_lib->vba.PartOfBurstThatFitsInROB) ++ / (mode_lib->vba.DCFCLK * 64) ++ + mode_lib->vba.StutterPeriod * mode_lib->vba.TotalRowReadBandwidth / mode_lib->vba.ReturnBW; ++ mode_lib->vba.StutterBurstTime = dml_max( ++ mode_lib->vba.StutterBurstTime, ++ (locals->LinesToFinishSwathTransferStutterCriticalPlane * locals->BytePerPixelYCriticalPlane * ++ locals->SwathWidthYCriticalPlane / mode_lib->vba.ReturnBW) ++ ); ++ ++ mode_lib->vba.TotalActiveWriteback = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; ++ } ++ } ++ ++ if (mode_lib->vba.TotalActiveWriteback == 0) { ++ mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 ++ - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) ++ / mode_lib->vba.StutterPeriod) * 100; ++ } else { ++ mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; ++ } ++ ++ mode_lib->vba.SmallestVBlank = 999999; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { ++ mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] ++ - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]; ++ } else { ++ mode_lib->vba.VBlankTime = 0; ++ } ++ mode_lib->vba.SmallestVBlank = dml_min( ++ mode_lib->vba.SmallestVBlank, ++ mode_lib->vba.VBlankTime); ++ } ++ ++ mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 ++ * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime ++ - mode_lib->vba.SmallestVBlank) ++ + mode_lib->vba.SmallestVBlank) ++ / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; ++} ++ ++static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) ++{ ++ // Display Pipe Configuration ++ double BytePerPixDETY; ++ double BytePerPixDETC; ++ double Read256BytesBlockHeightY; ++ double Read256BytesBlockHeightC; ++ double Read256BytesBlockWidthY; ++ double Read256BytesBlockWidthC; ++ double MaximumSwathHeightY; ++ double MaximumSwathHeightC; ++ double MinimumSwathHeightY; ++ double MinimumSwathHeightC; ++ double SwathWidth; ++ double SwathWidthGranularityY; ++ double SwathWidthGranularityC; ++ double RoundedUpMaxSwathSizeBytesY; ++ double RoundedUpMaxSwathSizeBytesC; ++ unsigned int j, k; ++ ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ bool MainPlaneDoesODMCombine = false; ++ ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { ++ BytePerPixDETY = 8; ++ BytePerPixDETC = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { ++ BytePerPixDETY = 4; ++ BytePerPixDETC = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { ++ BytePerPixDETY = 2; ++ BytePerPixDETC = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { ++ BytePerPixDETY = 1; ++ BytePerPixDETC = 0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { ++ BytePerPixDETY = 1; ++ BytePerPixDETC = 2; ++ } else { ++ BytePerPixDETY = 4.0 / 3.0; ++ BytePerPixDETC = 8.0 / 3.0; ++ } ++ ++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ Read256BytesBlockHeightY = 1; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { ++ Read256BytesBlockHeightY = 4; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { ++ Read256BytesBlockHeightY = 8; ++ } else { ++ Read256BytesBlockHeightY = 16; ++ } ++ Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) ++ / Read256BytesBlockHeightY; ++ Read256BytesBlockHeightC = 0; ++ Read256BytesBlockWidthC = 0; ++ } else { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ Read256BytesBlockHeightY = 1; ++ Read256BytesBlockHeightC = 1; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { ++ Read256BytesBlockHeightY = 16; ++ Read256BytesBlockHeightC = 8; ++ } else { ++ Read256BytesBlockHeightY = 8; ++ Read256BytesBlockHeightC = 8; ++ } ++ Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) ++ / Read256BytesBlockHeightY; ++ Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) ++ / Read256BytesBlockHeightC; ++ } ++ ++ if (mode_lib->vba.SourceScan[k] == dm_horz) { ++ MaximumSwathHeightY = Read256BytesBlockHeightY; ++ MaximumSwathHeightC = Read256BytesBlockHeightC; ++ } else { ++ MaximumSwathHeightY = Read256BytesBlockWidthY; ++ MaximumSwathHeightC = Read256BytesBlockWidthC; ++ } ++ ++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear ++ || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ && (mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_4kb_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_4kb_s_x ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s_t ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s_x ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_var_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_var_s_x) ++ && mode_lib->vba.SourceScan[k] == dm_horz)) { ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 ++ && mode_lib->vba.SourceScan[k] != dm_horz) { ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ } else { ++ MinimumSwathHeightY = MaximumSwathHeightY / 2.0; ++ } ++ MinimumSwathHeightC = MaximumSwathHeightC; ++ } else { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ MinimumSwathHeightC = MaximumSwathHeightC; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 ++ && mode_lib->vba.SourceScan[k] == dm_horz) { ++ MinimumSwathHeightY = MaximumSwathHeightY / 2.0; ++ MinimumSwathHeightC = MaximumSwathHeightC; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 ++ && mode_lib->vba.SourceScan[k] == dm_horz) { ++ MinimumSwathHeightC = MaximumSwathHeightC / 2.0; ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ } else { ++ MinimumSwathHeightY = MaximumSwathHeightY; ++ MinimumSwathHeightC = MaximumSwathHeightC; ++ } ++ } ++ ++ if (mode_lib->vba.SourceScan[k] == dm_horz) { ++ SwathWidth = mode_lib->vba.ViewportWidth[k]; ++ } else { ++ SwathWidth = mode_lib->vba.ViewportHeight[k]; ++ } ++ ++ if (mode_lib->vba.ODMCombineEnabled[k] == true) { ++ MainPlaneDoesODMCombine = true; ++ } ++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { ++ if (mode_lib->vba.BlendingAndTiming[k] == j ++ && mode_lib->vba.ODMCombineEnabled[j] == true) { ++ MainPlaneDoesODMCombine = true; ++ } ++ } ++ ++ if (MainPlaneDoesODMCombine == true) { ++ SwathWidth = dml_min( ++ SwathWidth, ++ mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); ++ } else { ++ SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; ++ } ++ ++ SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; ++ RoundedUpMaxSwathSizeBytesY = (dml_ceil( ++ (double) (SwathWidth - 1), ++ SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY ++ * MaximumSwathHeightY; ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { ++ RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) ++ + 256; ++ } ++ if (MaximumSwathHeightC > 0) { ++ SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) ++ / MaximumSwathHeightC; ++ RoundedUpMaxSwathSizeBytesC = (dml_ceil( ++ (double) (SwathWidth / 2.0 - 1), ++ SwathWidthGranularityC) + SwathWidthGranularityC) ++ * BytePerPixDETC * MaximumSwathHeightC; ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { ++ RoundedUpMaxSwathSizeBytesC = dml_ceil( ++ RoundedUpMaxSwathSizeBytesC, ++ 256) + 256; ++ } ++ } else ++ RoundedUpMaxSwathSizeBytesC = 0.0; ++ ++ if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC ++ <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { ++ mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; ++ mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; ++ } else { ++ mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; ++ mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; ++ } ++ ++ CalculateDETBufferSize( ++ mode_lib->vba.DETBufferSizeInKByte, ++ mode_lib->vba.SwathHeightY[k], ++ mode_lib->vba.SwathHeightC[k], ++ &mode_lib->vba.DETBufferSizeY[k], ++ &mode_lib->vba.DETBufferSizeC[k]); ++ } ++} ++ ++static double CalculateTWait( ++ unsigned int PrefetchMode, ++ double DRAMClockChangeLatency, ++ double UrgentLatency, ++ double SREnterPlusExitTime) ++{ ++ if (PrefetchMode == 0) { ++ return dml_max( ++ DRAMClockChangeLatency + UrgentLatency, ++ dml_max(SREnterPlusExitTime, UrgentLatency)); ++ } else if (PrefetchMode == 1) { ++ return dml_max(SREnterPlusExitTime, UrgentLatency); ++ } else { ++ return UrgentLatency; ++ } ++} ++ ++static double CalculateRemoteSurfaceFlipDelay( ++ struct display_mode_lib *mode_lib, ++ double VRatio, ++ double SwathWidth, ++ double Bpp, ++ double LineTime, ++ double XFCTSlvVupdateOffset, ++ double XFCTSlvVupdateWidth, ++ double XFCTSlvVreadyOffset, ++ double XFCXBUFLatencyTolerance, ++ double XFCFillBWOverhead, ++ double XFCSlvChunkSize, ++ double XFCBusTransportTime, ++ double TCalc, ++ double TWait, ++ double *SrcActiveDrainRate, ++ double *TInitXFill, ++ double *TslvChk) ++{ ++ double TSlvSetup, AvgfillRate, result; ++ ++ *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; ++ TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; ++ *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); ++ AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); ++ *TslvChk = XFCSlvChunkSize / AvgfillRate; ++ dml_print( ++ "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", ++ *SrcActiveDrainRate); ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); ++ result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide ++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); ++ return result; ++} ++ ++static double CalculateWriteBackDelay( ++ enum source_format_class WritebackPixelFormat, ++ double WritebackHRatio, ++ double WritebackVRatio, ++ unsigned int WritebackLumaHTaps, ++ unsigned int WritebackLumaVTaps, ++ unsigned int WritebackChromaHTaps, ++ unsigned int WritebackChromaVTaps, ++ unsigned int WritebackDestinationWidth) ++{ ++ double CalculateWriteBackDelay = ++ dml_max( ++ dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, ++ WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) ++ * dml_ceil( ++ WritebackDestinationWidth ++ / 4.0, ++ 1) ++ + dml_ceil(1.0 / WritebackVRatio, 1) ++ * (dml_ceil( ++ WritebackLumaVTaps ++ / 4.0, ++ 1) + 4)); ++ ++ if (WritebackPixelFormat != dm_444_32) { ++ CalculateWriteBackDelay = ++ dml_max( ++ CalculateWriteBackDelay, ++ dml_max( ++ dml_ceil( ++ WritebackChromaHTaps ++ / 2.0, ++ 1) ++ / (2 ++ * WritebackHRatio), ++ WritebackChromaVTaps ++ * dml_ceil( ++ 1 ++ / (2 ++ * WritebackVRatio), ++ 1) ++ * dml_ceil( ++ WritebackDestinationWidth ++ / 2.0 ++ / 2.0, ++ 1) ++ + dml_ceil( ++ 1 ++ / (2 ++ * WritebackVRatio), ++ 1) ++ * (dml_ceil( ++ WritebackChromaVTaps ++ / 4.0, ++ 1) ++ + 4))); ++ } ++ return CalculateWriteBackDelay; ++} ++ ++static void CalculateActiveRowBandwidth( ++ bool GPUVMEnable, ++ enum source_format_class SourcePixelFormat, ++ double VRatio, ++ bool DCCEnable, ++ double LineTime, ++ unsigned int MetaRowByteLuma, ++ unsigned int MetaRowByteChroma, ++ unsigned int meta_row_height_luma, ++ unsigned int meta_row_height_chroma, ++ unsigned int PixelPTEBytesPerRowLuma, ++ unsigned int PixelPTEBytesPerRowChroma, ++ unsigned int dpte_row_height_luma, ++ unsigned int dpte_row_height_chroma, ++ double *meta_row_bw, ++ double *dpte_row_bw) ++{ ++ if (DCCEnable != true) { ++ *meta_row_bw = 0; ++ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { ++ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) ++ + VRatio / 2 * MetaRowByteChroma ++ / (meta_row_height_chroma * LineTime); ++ } else { ++ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); ++ } ++ ++ if (GPUVMEnable != true) { ++ *dpte_row_bw = 0; ++ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { ++ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) ++ + VRatio / 2 * PixelPTEBytesPerRowChroma ++ / (dpte_row_height_chroma * LineTime); ++ } else { ++ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); ++ } ++} ++ ++static void CalculateFlipSchedule( ++ struct display_mode_lib *mode_lib, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ double UrgentExtraLatency, ++ double UrgentLatency, ++ unsigned int GPUVMMaxPageTableLevels, ++ bool HostVMEnable, ++ unsigned int HostVMMaxPageTableLevels, ++ unsigned int HostVMCachedPageTableLevels, ++ bool GPUVMEnable, ++ double PDEAndMetaPTEBytesPerFrame, ++ double MetaRowBytes, ++ double DPTEBytesPerRow, ++ double BandwidthAvailableForImmediateFlip, ++ unsigned int TotImmediateFlipBytes, ++ enum source_format_class SourcePixelFormat, ++ double LineTime, ++ double VRatio, ++ double Tno_bw, ++ bool DCCEnable, ++ unsigned int dpte_row_height, ++ unsigned int meta_row_height, ++ unsigned int dpte_row_height_chroma, ++ unsigned int meta_row_height_chroma, ++ double *DestinationLinesToRequestVMInImmediateFlip, ++ double *DestinationLinesToRequestRowInImmediateFlip, ++ double *final_flip_bw, ++ bool *ImmediateFlipSupportedForPipe) ++{ ++ double min_row_time = 0.0; ++ unsigned int HostVMDynamicLevels; ++ double TimeForFetchingMetaPTEImmediateFlip; ++ double TimeForFetchingRowInVBlankImmediateFlip; ++ double ImmediateFlipBW; ++ double HostVMInefficiencyFactor; ++ ++ if (GPUVMEnable == true && HostVMEnable == true) { ++ HostVMInefficiencyFactor = ++ PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData ++ / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; ++ HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels; ++ } else { ++ HostVMInefficiencyFactor = 1; ++ HostVMDynamicLevels = 0; ++ } ++ ++ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) ++ * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; ++ ++ if (GPUVMEnable == true) { ++ TimeForFetchingMetaPTEImmediateFlip = dml_max3( ++ Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, ++ UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevels + 1) - 1), ++ LineTime / 4.0); ++ } else { ++ TimeForFetchingMetaPTEImmediateFlip = 0; ++ } ++ ++ *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; ++ if ((GPUVMEnable == true || DCCEnable == true)) { ++ TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevels + 1), LineTime / 4); ++ } else { ++ TimeForFetchingRowInVBlankImmediateFlip = 0; ++ } ++ ++ *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; ++ *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); ++ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { ++ if (GPUVMEnable == true && DCCEnable != true) { ++ min_row_time = dml_min( ++ dpte_row_height * LineTime / VRatio, ++ dpte_row_height_chroma * LineTime / (VRatio / 2)); ++ } else if (GPUVMEnable != true && DCCEnable == true) { ++ min_row_time = dml_min( ++ meta_row_height * LineTime / VRatio, ++ meta_row_height_chroma * LineTime / (VRatio / 2)); ++ } else { ++ min_row_time = dml_min4( ++ dpte_row_height * LineTime / VRatio, ++ meta_row_height * LineTime / VRatio, ++ dpte_row_height_chroma * LineTime / (VRatio / 2), ++ meta_row_height_chroma * LineTime / (VRatio / 2)); ++ } ++ } else { ++ if (GPUVMEnable == true && DCCEnable != true) { ++ min_row_time = dpte_row_height * LineTime / VRatio; ++ } else if (GPUVMEnable != true && DCCEnable == true) { ++ min_row_time = meta_row_height * LineTime / VRatio; ++ } else { ++ min_row_time = dml_min( ++ dpte_row_height * LineTime / VRatio, ++ meta_row_height * LineTime / VRatio); ++ } ++ } ++ ++ if (*DestinationLinesToRequestVMInImmediateFlip >= 32 ++ || *DestinationLinesToRequestRowInImmediateFlip >= 16 ++ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { ++ *ImmediateFlipSupportedForPipe = false; ++ } else { ++ *ImmediateFlipSupportedForPipe = true; ++ } ++} ++ ++static unsigned int TruncToValidBPP( ++ double DecimalBPP, ++ double DesiredBPP, ++ bool DSCEnabled, ++ enum output_encoder_class Output, ++ enum output_format_class Format, ++ unsigned int DSCInputBitPerComponent) ++{ ++ if (Output == dm_hdmi) { ++ if (Format == dm_420) { ++ if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) ++ return 18; ++ else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15)) ++ return 15; ++ else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12)) ++ return 12; ++ else ++ return BPP_INVALID; ++ } else if (Format == dm_444) { ++ if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36)) ++ return 36; ++ else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30)) ++ return 30; ++ else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) ++ return 24; ++ else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) ++ return 18; ++ else ++ return BPP_INVALID; ++ } else { ++ if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) ++ return 24; ++ else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20)) ++ return 20; ++ else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16)) ++ return 16; ++ else ++ return BPP_INVALID; ++ } ++ } else { ++ if (DSCEnabled) { ++ if (Format == dm_420) { ++ if (DesiredBPP == 0) { ++ if (DecimalBPP < 6) ++ return BPP_INVALID; ++ else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0) ++ return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0; ++ else ++ return dml_floor(16 * DecimalBPP, 1) / 16.0; ++ } else { ++ if (DecimalBPP < 6 ++ || DesiredBPP < 6 ++ || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0 ++ || DecimalBPP < DesiredBPP) { ++ return BPP_INVALID; ++ } else { ++ return DesiredBPP; ++ } ++ } ++ } else if (Format == dm_n422) { ++ if (DesiredBPP == 0) { ++ if (DecimalBPP < 7) ++ return BPP_INVALID; ++ else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0) ++ return 2 * DSCInputBitPerComponent - 1.0 / 16.0; ++ else ++ return dml_floor(16 * DecimalBPP, 1) / 16.0; ++ } else { ++ if (DecimalBPP < 7 ++ || DesiredBPP < 7 ++ || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0 ++ || DecimalBPP < DesiredBPP) { ++ return BPP_INVALID; ++ } else { ++ return DesiredBPP; ++ } ++ } ++ } else { ++ if (DesiredBPP == 0) { ++ if (DecimalBPP < 8) ++ return BPP_INVALID; ++ else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0) ++ return 3 * DSCInputBitPerComponent - 1.0 / 16.0; ++ else ++ return dml_floor(16 * DecimalBPP, 1) / 16.0; ++ } else { ++ if (DecimalBPP < 8 ++ || DesiredBPP < 8 ++ || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0 ++ || DecimalBPP < DesiredBPP) { ++ return BPP_INVALID; ++ } else { ++ return DesiredBPP; ++ } ++ } ++ } ++ } else if (Format == dm_420) { ++ if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) ++ return 18; ++ else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15)) ++ return 15; ++ else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12)) ++ return 12; ++ else ++ return BPP_INVALID; ++ } else if (Format == dm_s422 || Format == dm_n422) { ++ if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) ++ return 24; ++ else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20)) ++ return 20; ++ else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16)) ++ return 16; ++ else ++ return BPP_INVALID; ++ } else { ++ if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36)) ++ return 36; ++ else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30)) ++ return 30; ++ else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) ++ return 24; ++ else ++ return BPP_INVALID; ++ } ++ } ++} ++ ++void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ++{ ++ struct vba_vars_st *locals = &mode_lib->vba; ++ ++ int i; ++ unsigned int j, k, m; ++ ++ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ ++ ++ /*Scale Ratio, taps Support Check*/ ++ ++ mode_lib->vba.ScaleRatioAndTapsSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.ScalerEnabled[k] == false ++ && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) ++ || mode_lib->vba.HRatio[k] != 1.0 ++ || mode_lib->vba.htaps[k] != 1.0 ++ || mode_lib->vba.VRatio[k] != 1.0 ++ || mode_lib->vba.vtaps[k] != 1.0)) { ++ mode_lib->vba.ScaleRatioAndTapsSupport = false; ++ } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 ++ || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 ++ || (mode_lib->vba.htaps[k] > 1.0 ++ && (mode_lib->vba.htaps[k] % 2) == 1) ++ || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio ++ || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio ++ || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] ++ || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] ++ || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 ++ && (mode_lib->vba.HRatio[k] / 2.0 ++ > mode_lib->vba.HTAPsChroma[k] ++ || mode_lib->vba.VRatio[k] / 2.0 ++ > mode_lib->vba.VTAPsChroma[k]))) { ++ mode_lib->vba.ScaleRatioAndTapsSupport = false; ++ } ++ } ++ /*Source Format, Pixel Format and Scan Support Check*/ ++ ++ mode_lib->vba.SourceFormatPixelAndScanSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear ++ && mode_lib->vba.SourceScan[k] != dm_horz) ++ || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d ++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) ++ || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x ++ && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_420_8 ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_420_10)) ++ || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_gfx7_2d_thin_lvp) ++ && !((mode_lib->vba.SourcePixelFormat[k] ++ == dm_444_64 ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_444_32) ++ && mode_lib->vba.SourceScan[k] ++ == dm_horz ++ && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp ++ == true ++ && mode_lib->vba.DCCEnable[k] ++ == false)) ++ || (mode_lib->vba.DCCEnable[k] == true ++ && (mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_linear ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_420_8 ++ || mode_lib->vba.SourcePixelFormat[k] ++ == dm_420_10)))) { ++ mode_lib->vba.SourceFormatPixelAndScanSupport = false; ++ } ++ } ++ /*Bandwidth Support Check*/ ++ ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { ++ locals->BytePerPixelInDETY[k] = 8.0; ++ locals->BytePerPixelInDETC[k] = 0.0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { ++ locals->BytePerPixelInDETY[k] = 4.0; ++ locals->BytePerPixelInDETC[k] = 0.0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { ++ locals->BytePerPixelInDETY[k] = 2.0; ++ locals->BytePerPixelInDETC[k] = 0.0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { ++ locals->BytePerPixelInDETY[k] = 1.0; ++ locals->BytePerPixelInDETC[k] = 0.0; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { ++ locals->BytePerPixelInDETY[k] = 1.0; ++ locals->BytePerPixelInDETC[k] = 2.0; ++ } else { ++ locals->BytePerPixelInDETY[k] = 4.0 / 3; ++ locals->BytePerPixelInDETC[k] = 8.0 / 3; ++ } ++ if (mode_lib->vba.SourceScan[k] == dm_horz) { ++ locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; ++ } else { ++ locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0) ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; ++ locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0) ++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0; ++ locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k]; ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true ++ && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { ++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) * 4.0; ++ } else if (mode_lib->vba.WritebackEnable[k] == true ++ && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { ++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) * 3.0; ++ } else if (mode_lib->vba.WritebackEnable[k] == true) { ++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] ++ * mode_lib->vba.WritebackDestinationHeight[k] ++ / (mode_lib->vba.WritebackSourceHeight[k] ++ * mode_lib->vba.HTotal[k] ++ / mode_lib->vba.PixelClock[k]) * 1.5; ++ } else { ++ locals->WriteBandwidth[k] = 0.0; ++ } ++ } ++ mode_lib->vba.DCCEnabledInAnyPlane = false; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.DCCEnable[k] == true) { ++ mode_lib->vba.DCCEnabledInAnyPlane = true; ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->IdealSDPPortBandwidthPerState[i] = dml_min3( ++ mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], ++ mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels ++ * mode_lib->vba.DRAMChannelWidth, ++ mode_lib->vba.FabricClockPerState[i] ++ * mode_lib->vba.FabricDatapathToDCNDataReturn); ++ if (mode_lib->vba.HostVMEnable == false) { ++ locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] ++ * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0; ++ } else { ++ locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] ++ * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0; ++ } ++ } ++ /*Writeback Latency support check*/ ++ ++ mode_lib->vba.WritebackLatencySupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { ++ if (locals->WriteBandwidth[k] ++ > (mode_lib->vba.WritebackInterfaceLumaBufferSize ++ + mode_lib->vba.WritebackInterfaceChromaBufferSize) ++ / mode_lib->vba.WritebackLatency) { ++ mode_lib->vba.WritebackLatencySupport = false; ++ } ++ } else { ++ if (locals->WriteBandwidth[k] ++ > 1.5 ++ * dml_min( ++ mode_lib->vba.WritebackInterfaceLumaBufferSize, ++ 2.0 ++ * mode_lib->vba.WritebackInterfaceChromaBufferSize) ++ / mode_lib->vba.WritebackLatency) { ++ mode_lib->vba.WritebackLatencySupport = false; ++ } ++ } ++ } ++ } ++ /*Re-ordering Buffer Support Check*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = ++ (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] ++ + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly) ++ * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; ++ if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] ++ > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { ++ locals->ROBSupport[i] = true; ++ } else { ++ locals->ROBSupport[i] = false; ++ } ++ } ++ /*Writeback Mode Support Check*/ ++ ++ mode_lib->vba.TotalNumberOfActiveWriteback = 0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0) ++ mode_lib->vba.ActiveWritebacksPerPlane[k] = 1; ++ mode_lib->vba.TotalNumberOfActiveWriteback = ++ mode_lib->vba.TotalNumberOfActiveWriteback ++ + mode_lib->vba.ActiveWritebacksPerPlane[k]; ++ } ++ } ++ mode_lib->vba.WritebackModeSupport = true; ++ if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { ++ mode_lib->vba.WritebackModeSupport = false; ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true ++ && mode_lib->vba.Writeback10bpc420Supported != true ++ && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { ++ mode_lib->vba.WritebackModeSupport = false; ++ } ++ } ++ /*Writeback Scale Ratio and Taps Support Check*/ ++ ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false ++ && (mode_lib->vba.WritebackHRatio[k] != 1.0 ++ || mode_lib->vba.WritebackVRatio[k] != 1.0)) { ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; ++ } ++ if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio ++ || mode_lib->vba.WritebackVRatio[k] ++ > mode_lib->vba.WritebackMaxVSCLRatio ++ || mode_lib->vba.WritebackHRatio[k] ++ < mode_lib->vba.WritebackMinHSCLRatio ++ || mode_lib->vba.WritebackVRatio[k] ++ < mode_lib->vba.WritebackMinVSCLRatio ++ || mode_lib->vba.WritebackLumaHTaps[k] ++ > mode_lib->vba.WritebackMaxHSCLTaps ++ || mode_lib->vba.WritebackLumaVTaps[k] ++ > mode_lib->vba.WritebackMaxVSCLTaps ++ || mode_lib->vba.WritebackHRatio[k] ++ > mode_lib->vba.WritebackLumaHTaps[k] ++ || mode_lib->vba.WritebackVRatio[k] ++ > mode_lib->vba.WritebackLumaVTaps[k] ++ || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 ++ && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) ++ == 1)) ++ || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 ++ && (mode_lib->vba.WritebackChromaHTaps[k] ++ > mode_lib->vba.WritebackMaxHSCLTaps ++ || mode_lib->vba.WritebackChromaVTaps[k] ++ > mode_lib->vba.WritebackMaxVSCLTaps ++ || 2.0 ++ * mode_lib->vba.WritebackHRatio[k] ++ > mode_lib->vba.WritebackChromaHTaps[k] ++ || 2.0 ++ * mode_lib->vba.WritebackVRatio[k] ++ > mode_lib->vba.WritebackChromaVTaps[k] ++ || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 ++ && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; ++ } ++ if (mode_lib->vba.WritebackVRatio[k] < 1.0) { ++ mode_lib->vba.WritebackLumaVExtra = ++ dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); ++ } else { ++ mode_lib->vba.WritebackLumaVExtra = -1; ++ } ++ if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 ++ && mode_lib->vba.WritebackLumaVTaps[k] ++ > (mode_lib->vba.WritebackLineBufferLumaBufferSize ++ + mode_lib->vba.WritebackLineBufferChromaBufferSize) ++ / 3.0 ++ / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackLumaVExtra) ++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 ++ && mode_lib->vba.WritebackLumaVTaps[k] ++ > mode_lib->vba.WritebackLineBufferLumaBufferSize ++ * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackLumaVExtra) ++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 ++ && mode_lib->vba.WritebackLumaVTaps[k] ++ > mode_lib->vba.WritebackLineBufferLumaBufferSize ++ * 8.0 / 10.0 ++ / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackLumaVExtra)) { ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; ++ } ++ if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { ++ mode_lib->vba.WritebackChromaVExtra = 0.0; ++ } else { ++ mode_lib->vba.WritebackChromaVExtra = -1; ++ } ++ if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 ++ && mode_lib->vba.WritebackChromaVTaps[k] ++ > mode_lib->vba.WritebackLineBufferChromaBufferSize ++ * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackChromaVExtra) ++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 ++ && mode_lib->vba.WritebackChromaVTaps[k] ++ > mode_lib->vba.WritebackLineBufferChromaBufferSize ++ * 8.0 / 10.0 ++ / mode_lib->vba.WritebackDestinationWidth[k] ++ - mode_lib->vba.WritebackChromaVExtra)) { ++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; ++ } ++ } ++ } ++ /*Maximum DISPCLK/DPPCLK Support check*/ ++ ++ mode_lib->vba.WritebackRequiredDISPCLK = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ mode_lib->vba.WritebackRequiredDISPCLK = ++ dml_max( ++ mode_lib->vba.WritebackRequiredDISPCLK, ++ CalculateWriteBackDISPCLK( ++ mode_lib->vba.WritebackPixelFormat[k], ++ mode_lib->vba.PixelClock[k], ++ mode_lib->vba.WritebackHRatio[k], ++ mode_lib->vba.WritebackVRatio[k], ++ mode_lib->vba.WritebackLumaHTaps[k], ++ mode_lib->vba.WritebackLumaVTaps[k], ++ mode_lib->vba.WritebackChromaHTaps[k], ++ mode_lib->vba.WritebackChromaVTaps[k], ++ mode_lib->vba.WritebackDestinationWidth[k], ++ mode_lib->vba.HTotal[k], ++ mode_lib->vba.WritebackChromaLineBufferWidth)); ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.HRatio[k] > 1.0) { ++ locals->PSCL_FACTOR[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput ++ * mode_lib->vba.HRatio[k] ++ / dml_ceil( ++ mode_lib->vba.htaps[k] ++ / 6.0, ++ 1.0)); ++ } else { ++ locals->PSCL_FACTOR[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput); ++ } ++ if (locals->BytePerPixelInDETC[k] == 0.0) { ++ locals->PSCL_FACTOR_CHROMA[k] = 0.0; ++ locals->MinDPPCLKUsingSingleDPP[k] = ++ mode_lib->vba.PixelClock[k] ++ * dml_max3( ++ mode_lib->vba.vtaps[k] / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k]), ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / locals->PSCL_FACTOR[k], ++ 1.0); ++ if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) ++ && locals->MinDPPCLKUsingSingleDPP[k] ++ < 2.0 * mode_lib->vba.PixelClock[k]) { ++ locals->MinDPPCLKUsingSingleDPP[k] = 2.0 ++ * mode_lib->vba.PixelClock[k]; ++ } ++ } else { ++ if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { ++ locals->PSCL_FACTOR_CHROMA[k] = ++ dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput ++ * mode_lib->vba.HRatio[k] ++ / 2.0 ++ / dml_ceil( ++ mode_lib->vba.HTAPsChroma[k] ++ / 6.0, ++ 1.0)); ++ } else { ++ locals->PSCL_FACTOR_CHROMA[k] = dml_min( ++ mode_lib->vba.MaxDCHUBToPSCLThroughput, ++ mode_lib->vba.MaxPSCLToLBThroughput); ++ } ++ locals->MinDPPCLKUsingSingleDPP[k] = ++ mode_lib->vba.PixelClock[k] ++ * dml_max5( ++ mode_lib->vba.vtaps[k] / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k]), ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / locals->PSCL_FACTOR[k], ++ mode_lib->vba.VTAPsChroma[k] ++ / 6.0 ++ * dml_min( ++ 1.0, ++ mode_lib->vba.HRatio[k] ++ / 2.0), ++ mode_lib->vba.HRatio[k] ++ * mode_lib->vba.VRatio[k] ++ / 4.0 ++ / locals->PSCL_FACTOR_CHROMA[k], ++ 1.0); ++ if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 ++ || mode_lib->vba.HTAPsChroma[k] > 6.0 ++ || mode_lib->vba.VTAPsChroma[k] > 6.0) ++ && locals->MinDPPCLKUsingSingleDPP[k] ++ < 2.0 * mode_lib->vba.PixelClock[k]) { ++ locals->MinDPPCLKUsingSingleDPP[k] = 2.0 ++ * mode_lib->vba.PixelClock[k]; ++ } ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ Calculate256BBlockSizes( ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0), ++ dml_ceil(locals->BytePerPixelInDETC[k], 2.0), ++ &locals->Read256BlockHeightY[k], ++ &locals->Read256BlockHeightC[k], ++ &locals->Read256BlockWidthY[k], ++ &locals->Read256BlockWidthC[k]); ++ if (mode_lib->vba.SourceScan[k] == dm_horz) { ++ locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k]; ++ locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k]; ++ } else { ++ locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k]; ++ locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k]; ++ } ++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 ++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear ++ || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 ++ && (mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_4kb_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_4kb_s_x ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s_t ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_64kb_s_x ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_var_s ++ || mode_lib->vba.SurfaceTiling[k] ++ == dm_sw_var_s_x) ++ && mode_lib->vba.SourceScan[k] == dm_horz)) { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; ++ } else { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] ++ / 2.0; ++ } ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; ++ } else { ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 ++ && mode_lib->vba.SourceScan[k] == dm_horz) { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] ++ / 2.0; ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; ++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 ++ && mode_lib->vba.SourceScan[k] == dm_horz) { ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k] ++ / 2.0; ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; ++ } else { ++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; ++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; ++ } ++ } ++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { ++ mode_lib->vba.MaximumSwathWidthSupport = 8192.0; ++ } else { ++ mode_lib->vba.MaximumSwathWidthSupport = 5120.0; ++ } ++ mode_lib->vba.MaximumSwathWidthInDETBuffer = ++ dml_min( ++ mode_lib->vba.MaximumSwathWidthSupport, ++ mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0 ++ / (locals->BytePerPixelInDETY[k] ++ * locals->MinSwathHeightY[k] ++ + locals->BytePerPixelInDETC[k] ++ / 2.0 ++ * locals->MinSwathHeightC[k])); ++ if (locals->BytePerPixelInDETC[k] == 0.0) { ++ mode_lib->vba.MaximumSwathWidthInLineBuffer = ++ mode_lib->vba.LineBufferSize ++ * dml_max(mode_lib->vba.HRatio[k], 1.0) ++ / mode_lib->vba.LBBitPerPixel[k] ++ / (mode_lib->vba.vtaps[k] ++ + dml_max( ++ dml_ceil( ++ mode_lib->vba.VRatio[k], ++ 1.0) ++ - 2, ++ 0.0)); ++ } else { ++ mode_lib->vba.MaximumSwathWidthInLineBuffer = ++ dml_min( ++ mode_lib->vba.LineBufferSize ++ * dml_max( ++ mode_lib->vba.HRatio[k], ++ 1.0) ++ / mode_lib->vba.LBBitPerPixel[k] ++ / (mode_lib->vba.vtaps[k] ++ + dml_max( ++ dml_ceil( ++ mode_lib->vba.VRatio[k], ++ 1.0) ++ - 2, ++ 0.0)), ++ 2.0 * mode_lib->vba.LineBufferSize ++ * dml_max( ++ mode_lib->vba.HRatio[k] ++ / 2.0, ++ 1.0) ++ / mode_lib->vba.LBBitPerPixel[k] ++ / (mode_lib->vba.VTAPsChroma[k] ++ + dml_max( ++ dml_ceil( ++ mode_lib->vba.VRatio[k] ++ / 2.0, ++ 1.0) ++ - 2, ++ 0.0))); ++ } ++ locals->MaximumSwathWidth[k] = dml_min( ++ mode_lib->vba.MaximumSwathWidthInDETBuffer, ++ mode_lib->vba.MaximumSwathWidthInLineBuffer); ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( ++ mode_lib->vba.MaxDispclk[i], ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( ++ mode_lib->vba.MaxDppclk[i], ++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed); ++ locals->RequiredDISPCLK[i][j] = 0.0; ++ locals->DISPCLK_DPPCLK_Support[i][j] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = ++ mode_lib->vba.PixelClock[k] ++ * (1.0 ++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading ++ / 100.0) ++ * (1.0 ++ + mode_lib->vba.DISPCLKRampingMargin ++ / 100.0); ++ if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i] ++ && i == mode_lib->vba.soc.num_states) ++ mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k] ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ ++ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0); ++ if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i] ++ && i == mode_lib->vba.soc.num_states) ++ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { ++ locals->ODMCombineEnablePerState[i][k] = false; ++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; ++ } else { ++ locals->ODMCombineEnablePerState[i][k] = true; ++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; ++ } ++ if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity ++ && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] ++ && locals->ODMCombineEnablePerState[i][k] == false) { ++ locals->NoOfDPP[i][j][k] = 1; ++ locals->RequiredDPPCLK[i][j][k] = ++ locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ } else { ++ locals->NoOfDPP[i][j][k] = 2; ++ locals->RequiredDPPCLK[i][j][k] = ++ locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; ++ } ++ locals->RequiredDISPCLK[i][j] = dml_max( ++ locals->RequiredDISPCLK[i][j], ++ mode_lib->vba.PlaneRequiredDISPCLK); ++ if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) ++ > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) ++ || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { ++ locals->DISPCLK_DPPCLK_Support[i][j] = false; ++ } ++ } ++ locals->TotalNumberOfActiveDPP[i][j] = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) ++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; ++ if (j == 1) { ++ while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP ++ && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) { ++ double BWOfNonSplitPlaneOfMaximumBandwidth; ++ unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; ++ ++ BWOfNonSplitPlaneOfMaximumBandwidth = 0; ++ NumberOfNonSplitPlaneOfMaximumBandwidth = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) { ++ BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k]; ++ NumberOfNonSplitPlaneOfMaximumBandwidth = k; ++ } ++ } ++ locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; ++ locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = ++ locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] ++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; ++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1; ++ } ++ } ++ if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) { ++ locals->RequiredDISPCLK[i][j] = 0.0; ++ locals->DISPCLK_DPPCLK_Support[i][j] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->ODMCombineEnablePerState[i][k] = false; ++ if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { ++ locals->NoOfDPP[i][j][k] = 1; ++ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] ++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ } else { ++ locals->NoOfDPP[i][j][k] = 2; ++ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] ++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; ++ } ++ if (i != mode_lib->vba.soc.num_states) { ++ mode_lib->vba.PlaneRequiredDISPCLK = ++ mode_lib->vba.PixelClock[k] ++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) ++ * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); ++ } else { ++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k] ++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); ++ } ++ locals->RequiredDISPCLK[i][j] = dml_max( ++ locals->RequiredDISPCLK[i][j], ++ mode_lib->vba.PlaneRequiredDISPCLK); ++ if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) ++ > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity ++ || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) ++ locals->DISPCLK_DPPCLK_Support[i][j] = false; ++ } ++ locals->TotalNumberOfActiveDPP[i][j] = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) ++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; ++ } ++ locals->RequiredDISPCLK[i][j] = dml_max( ++ locals->RequiredDISPCLK[i][j], ++ mode_lib->vba.WritebackRequiredDISPCLK); ++ if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity ++ < mode_lib->vba.WritebackRequiredDISPCLK) { ++ locals->DISPCLK_DPPCLK_Support[i][j] = false; ++ } ++ } ++ } ++ /*Viewport Size Check*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->ViewportSizeSupport[i] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->ODMCombineEnablePerState[i][k] == true) { ++ if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) ++ > locals->MaximumSwathWidth[k]) { ++ locals->ViewportSizeSupport[i] = false; ++ } ++ } else { ++ if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { ++ locals->ViewportSizeSupport[i] = false; ++ } ++ } ++ } ++ } ++ /*Total Available Pipes Support Check*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP) ++ locals->TotalAvailablePipesSupport[i][j] = true; ++ else ++ locals->TotalAvailablePipesSupport[i][j] = false; ++ } ++ } ++ /*Total Available OTG Support Check*/ ++ ++ mode_lib->vba.TotalNumberOfActiveOTG = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG ++ + 1.0; ++ } ++ } ++ if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { ++ mode_lib->vba.NumberOfOTGSupport = true; ++ } else { ++ mode_lib->vba.NumberOfOTGSupport = false; ++ } ++ /*Display IO and DSC Support Check*/ ++ ++ mode_lib->vba.NonsupportedDSCInputBPC = false; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 ++ || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 ++ || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { ++ mode_lib->vba.NonsupportedDSCInputBPC = true; ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->RequiresDSC[i][k] = 0; ++ locals->RequiresFEC[i][k] = 0; ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ if (mode_lib->vba.Output[k] == dm_hdmi) { ++ locals->RequiresDSC[i][k] = 0; ++ locals->RequiresFEC[i][k] = 0; ++ locals->OutputBppPerState[i][k] = TruncToValidBPP( ++ dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24, ++ mode_lib->vba.ForcedOutputLinkBPP[k], ++ false, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ } else if (mode_lib->vba.Output[k] == dm_dp ++ || mode_lib->vba.Output[k] == dm_edp) { ++ if (mode_lib->vba.Output[k] == dm_edp) { ++ mode_lib->vba.EffectiveFECOverhead = 0.0; ++ } else { ++ mode_lib->vba.EffectiveFECOverhead = ++ mode_lib->vba.FECOverhead; ++ } ++ if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { ++ mode_lib->vba.Outbpp = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ mode_lib->vba.ForcedOutputLinkBPP[k], ++ false, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ mode_lib->vba.OutbppDSC = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ mode_lib->vba.ForcedOutputLinkBPP[k], ++ true, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ if (mode_lib->vba.DSCEnabled[k] == true) { ++ locals->RequiresDSC[i][k] = true; ++ if (mode_lib->vba.Output[k] == dm_dp) { ++ locals->RequiresFEC[i][k] = true; ++ } else { ++ locals->RequiresFEC[i][k] = false; ++ } ++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; ++ } else { ++ locals->RequiresDSC[i][k] = false; ++ locals->RequiresFEC[i][k] = false; ++ } ++ locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; ++ } ++ if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) { ++ mode_lib->vba.Outbpp = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ mode_lib->vba.ForcedOutputLinkBPP[k], ++ false, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ mode_lib->vba.OutbppDSC = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ mode_lib->vba.ForcedOutputLinkBPP[k], ++ true, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ if (mode_lib->vba.DSCEnabled[k] == true) { ++ locals->RequiresDSC[i][k] = true; ++ if (mode_lib->vba.Output[k] == dm_dp) { ++ locals->RequiresFEC[i][k] = true; ++ } else { ++ locals->RequiresFEC[i][k] = false; ++ } ++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; ++ } else { ++ locals->RequiresDSC[i][k] = false; ++ locals->RequiresFEC[i][k] = false; ++ } ++ locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; ++ } ++ if (mode_lib->vba.Outbpp == BPP_INVALID ++ && mode_lib->vba.PHYCLKPerState[i] ++ >= 810.0) { ++ mode_lib->vba.Outbpp = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ mode_lib->vba.ForcedOutputLinkBPP[k], ++ false, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ mode_lib->vba.OutbppDSC = TruncToValidBPP( ++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 ++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, ++ mode_lib->vba.ForcedOutputLinkBPP[k], ++ true, ++ mode_lib->vba.Output[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.DSCInputBitPerComponent[k]); ++ if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) { ++ locals->RequiresDSC[i][k] = true; ++ if (mode_lib->vba.Output[k] == dm_dp) { ++ locals->RequiresFEC[i][k] = true; ++ } else { ++ locals->RequiresFEC[i][k] = false; ++ } ++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; ++ } else { ++ locals->RequiresDSC[i][k] = false; ++ locals->RequiresFEC[i][k] = false; ++ } ++ locals->OutputBppPerState[i][k] = ++ mode_lib->vba.Outbpp; ++ } ++ } ++ } else { ++ locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE; ++ } ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->DIOSupport[i] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->OutputBppPerState[i][k] == BPP_INVALID ++ || (mode_lib->vba.OutputFormat[k] == dm_420 ++ && mode_lib->vba.Interlace[k] == true ++ && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) { ++ locals->DIOSupport[i] = false; ++ } ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->DSCCLKRequiredMoreThanSupported[i] = false; ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ if ((mode_lib->vba.Output[k] == dm_dp ++ || mode_lib->vba.Output[k] == dm_edp)) { ++ if (mode_lib->vba.OutputFormat[k] == dm_420 ++ || mode_lib->vba.OutputFormat[k] ++ == dm_n422) { ++ mode_lib->vba.DSCFormatFactor = 2; ++ } else { ++ mode_lib->vba.DSCFormatFactor = 1; ++ } ++ if (locals->RequiresDSC[i][k] == true) { ++ if (locals->ODMCombineEnablePerState[i][k] ++ == true) { ++ if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor ++ > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { ++ locals->DSCCLKRequiredMoreThanSupported[i] = ++ true; ++ } ++ } else { ++ if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor ++ > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { ++ locals->DSCCLKRequiredMoreThanSupported[i] = ++ true; ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ locals->NotEnoughDSCUnits[i] = false; ++ mode_lib->vba.TotalDSCUnitsRequired = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->RequiresDSC[i][k] == true) { ++ if (locals->ODMCombineEnablePerState[i][k] == true) { ++ mode_lib->vba.TotalDSCUnitsRequired = ++ mode_lib->vba.TotalDSCUnitsRequired + 2.0; ++ } else { ++ mode_lib->vba.TotalDSCUnitsRequired = ++ mode_lib->vba.TotalDSCUnitsRequired + 1.0; ++ } ++ } ++ } ++ if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { ++ locals->NotEnoughDSCUnits[i] = true; ++ } ++ } ++ /*DSC Delay per state*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.BlendingAndTiming[k] != k) { ++ mode_lib->vba.slices = 0; ++ } else if (locals->RequiresDSC[i][k] == 0 ++ || locals->RequiresDSC[i][k] == false) { ++ mode_lib->vba.slices = 0; ++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { ++ mode_lib->vba.slices = dml_ceil( ++ mode_lib->vba.PixelClockBackEnd[k] / 400.0, ++ 4.0); ++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { ++ mode_lib->vba.slices = 8.0; ++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { ++ mode_lib->vba.slices = 4.0; ++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { ++ mode_lib->vba.slices = 2.0; ++ } else { ++ mode_lib->vba.slices = 1.0; ++ } ++ if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE ++ || locals->OutputBppPerState[i][k] == BPP_INVALID) { ++ mode_lib->vba.bpp = 0.0; ++ } else { ++ mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; ++ } ++ if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { ++ if (locals->ODMCombineEnablePerState[i][k] == false) { ++ locals->DSCDelayPerState[i][k] = ++ dscceComputeDelay( ++ mode_lib->vba.DSCInputBitPerComponent[k], ++ mode_lib->vba.bpp, ++ dml_ceil( ++ mode_lib->vba.HActive[k] ++ / mode_lib->vba.slices, ++ 1.0), ++ mode_lib->vba.slices, ++ mode_lib->vba.OutputFormat[k]) ++ + dscComputeDelay( ++ mode_lib->vba.OutputFormat[k]); ++ } else { ++ locals->DSCDelayPerState[i][k] = ++ 2.0 * (dscceComputeDelay( ++ mode_lib->vba.DSCInputBitPerComponent[k], ++ mode_lib->vba.bpp, ++ dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0), ++ mode_lib->vba.slices / 2, ++ mode_lib->vba.OutputFormat[k]) ++ + dscComputeDelay(mode_lib->vba.OutputFormat[k])); ++ } ++ locals->DSCDelayPerState[i][k] = ++ locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k]; ++ } else { ++ locals->DSCDelayPerState[i][k] = 0.0; ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { ++ for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true) ++ locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m]; ++ } ++ } ++ } ++ } ++ ++ //Prefetch Check ++ for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { ++ for (j = 0; j <= 1; ++j) { ++ locals->TotalNumberOfDCCActiveDPP[i][j] = 0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.DCCEnable[k] == true) ++ locals->TotalNumberOfDCCActiveDPP[i][j] = locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; ++ } ++ } ++ } ++ ++ mode_lib->vba.UrgentLatency = dml_max3( ++ mode_lib->vba.UrgentLatencyPixelDataOnly, ++ mode_lib->vba.UrgentLatencyPixelMixedWithVMData, ++ mode_lib->vba.UrgentLatencyVMDataOnly); ++ mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode( ++ mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, ++ &mode_lib->vba.MinPrefetchMode, ++ &mode_lib->vba.MaxPrefetchMode); ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; ++ locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; ++ if (locals->ODMCombineEnablePerState[i][k] == true) { ++ locals->SwathWidthYThisState[k] = ++ dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])); ++ } else { ++ locals->SwathWidthYThisState[k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; ++ } ++ mode_lib->vba.SwathWidthGranularityY = 256.0 ++ / dml_ceil(locals->BytePerPixelInDETY[k], 1.0) ++ / locals->MaxSwathHeightY[k]; ++ mode_lib->vba.RoundedUpMaxSwathSizeBytesY = ++ (dml_ceil(locals->SwathWidthYThisState[k] - 1.0, mode_lib->vba.SwathWidthGranularityY) ++ + mode_lib->vba.SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k]; ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { ++ mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil( ++ mode_lib->vba.RoundedUpMaxSwathSizeBytesY, ++ 256.0) + 256; ++ } ++ if (locals->MaxSwathHeightC[k] > 0.0) { ++ mode_lib->vba.SwathWidthGranularityC = 256.0 / dml_ceil(locals->BytePerPixelInDETC[k], 2.0) / locals->MaxSwathHeightC[k]; ++ mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYThisState[k] / 2.0 - 1.0, mode_lib->vba.SwathWidthGranularityC) ++ + mode_lib->vba.SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k]; ++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { ++ mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil(mode_lib->vba.RoundedUpMaxSwathSizeBytesC, 256.0) + 256; ++ } ++ } else { ++ mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0; ++ } ++ if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC ++ <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { ++ locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k]; ++ locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k]; ++ } else { ++ locals->SwathHeightYThisState[k] = ++ locals->MinSwathHeightY[k]; ++ locals->SwathHeightCThisState[k] = ++ locals->MinSwathHeightC[k]; ++ } ++ } ++ ++ CalculateDCFCLKDeepSleep( ++ mode_lib, ++ mode_lib->vba.NumberOfActivePlanes, ++ locals->BytePerPixelInDETY, ++ locals->BytePerPixelInDETC, ++ mode_lib->vba.VRatio, ++ locals->SwathWidthYThisState, ++ locals->NoOfDPPThisState, ++ mode_lib->vba.HRatio, ++ mode_lib->vba.PixelClock, ++ locals->PSCL_FACTOR, ++ locals->PSCL_FACTOR_CHROMA, ++ locals->RequiredDPPCLKThisState, ++ &mode_lib->vba.ProjectedDCFCLKDeepSleep); ++ ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( ++ mode_lib, ++ mode_lib->vba.DCCEnable[k], ++ locals->Read256BlockHeightC[k], ++ locals->Read256BlockWidthC[k], ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(locals->BytePerPixelInDETC[k], 2.0), ++ mode_lib->vba.SourceScan[k], ++ mode_lib->vba.ViewportWidth[k] / 2.0, ++ mode_lib->vba.ViewportHeight[k] / 2.0, ++ locals->SwathWidthYThisState[k] / 2.0, ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.HostVMEnable, ++ mode_lib->vba.HostVMMaxPageTableLevels, ++ mode_lib->vba.HostVMCachedPageTableLevels, ++ mode_lib->vba.VMMPageSize, ++ mode_lib->vba.PTEBufferSizeInRequestsChroma, ++ mode_lib->vba.PitchC[k], ++ 0.0, ++ &locals->MacroTileWidthC[k], ++ &mode_lib->vba.MetaRowBytesC, ++ &mode_lib->vba.DPTEBytesPerRowC, ++ &locals->PTEBufferSizeNotExceededC[i][j][k], ++ locals->dpte_row_width_chroma_ub, ++ &locals->dpte_row_height_chroma[k], ++ &locals->meta_req_width_chroma[k], ++ &locals->meta_req_height_chroma[k], ++ &locals->meta_row_width_chroma[k], ++ &locals->meta_row_height_chroma[k], ++ &locals->vm_group_bytes_chroma, ++ &locals->dpte_group_bytes_chroma, ++ locals->PixelPTEReqWidthC, ++ locals->PixelPTEReqHeightC, ++ locals->PTERequestSizeC, ++ locals->dpde0_bytes_per_frame_ub_c, ++ locals->meta_pte_bytes_per_frame_ub_c); ++ locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines( ++ mode_lib, ++ mode_lib->vba.VRatio[k]/2, ++ mode_lib->vba.VTAPsChroma[k], ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ locals->SwathHeightCThisState[k], ++ mode_lib->vba.ViewportYStartC[k], ++ &locals->PrefillC[k], ++ &locals->MaxNumSwC[k]); ++ locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma; ++ } else { ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; ++ mode_lib->vba.MetaRowBytesC = 0.0; ++ mode_lib->vba.DPTEBytesPerRowC = 0.0; ++ locals->PrefetchLinesC[k] = 0.0; ++ locals->PTEBufferSizeNotExceededC[i][j][k] = true; ++ locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; ++ } ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( ++ mode_lib, ++ mode_lib->vba.DCCEnable[k], ++ locals->Read256BlockHeightY[k], ++ locals->Read256BlockWidthY[k], ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.SurfaceTiling[k], ++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0), ++ mode_lib->vba.SourceScan[k], ++ mode_lib->vba.ViewportWidth[k], ++ mode_lib->vba.ViewportHeight[k], ++ locals->SwathWidthYThisState[k], ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.HostVMEnable, ++ mode_lib->vba.HostVMMaxPageTableLevels, ++ mode_lib->vba.HostVMCachedPageTableLevels, ++ mode_lib->vba.VMMPageSize, ++ locals->PTEBufferSizeInRequestsForLuma, ++ mode_lib->vba.PitchY[k], ++ mode_lib->vba.DCCMetaPitchY[k], ++ &locals->MacroTileWidthY[k], ++ &mode_lib->vba.MetaRowBytesY, ++ &mode_lib->vba.DPTEBytesPerRowY, ++ &locals->PTEBufferSizeNotExceededY[i][j][k], ++ locals->dpte_row_width_luma_ub, ++ &locals->dpte_row_height[k], ++ &locals->meta_req_width[k], ++ &locals->meta_req_height[k], ++ &locals->meta_row_width[k], ++ &locals->meta_row_height[k], ++ &locals->vm_group_bytes[k], ++ &locals->dpte_group_bytes[k], ++ locals->PixelPTEReqWidthY, ++ locals->PixelPTEReqHeightY, ++ locals->PTERequestSizeY, ++ locals->dpde0_bytes_per_frame_ub_l, ++ locals->meta_pte_bytes_per_frame_ub_l); ++ locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.vtaps[k], ++ mode_lib->vba.Interlace[k], ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ locals->SwathHeightYThisState[k], ++ mode_lib->vba.ViewportYStartY[k], ++ &locals->PrefillY[k], ++ &locals->MaxNumSwY[k]); ++ locals->PDEAndMetaPTEBytesPerFrame[k] = ++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; ++ locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; ++ locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; ++ ++ CalculateActiveRowBandwidth( ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.VRatio[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.HTotal[k] / ++ mode_lib->vba.PixelClock[k], ++ mode_lib->vba.MetaRowBytesY, ++ mode_lib->vba.MetaRowBytesC, ++ locals->meta_row_height[k], ++ locals->meta_row_height_chroma[k], ++ mode_lib->vba.DPTEBytesPerRowY, ++ mode_lib->vba.DPTEBytesPerRowC, ++ locals->dpte_row_height[k], ++ locals->dpte_row_height_chroma[k], ++ &locals->meta_row_bw[k], ++ &locals->dpte_row_bw[k]); ++ } ++ mode_lib->vba.ExtraLatency = CalculateExtraLatency( ++ locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i], ++ locals->TotalNumberOfActiveDPP[i][j], ++ mode_lib->vba.PixelChunkSizeInKByte, ++ locals->TotalNumberOfDCCActiveDPP[i][j], ++ mode_lib->vba.MetaChunkSize, ++ locals->ReturnBWPerState[i], ++ mode_lib->vba.GPUVMEnable, ++ mode_lib->vba.HostVMEnable, ++ mode_lib->vba.NumberOfActivePlanes, ++ locals->NoOfDPPThisState, ++ locals->dpte_group_bytes, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ mode_lib->vba.HostVMMaxPageTableLevels, ++ mode_lib->vba.HostVMCachedPageTableLevels); ++ ++ mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ if (mode_lib->vba.WritebackEnable[k] == true) { ++ locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency ++ + CalculateWriteBackDelay( ++ mode_lib->vba.WritebackPixelFormat[k], ++ mode_lib->vba.WritebackHRatio[k], ++ mode_lib->vba.WritebackVRatio[k], ++ mode_lib->vba.WritebackLumaHTaps[k], ++ mode_lib->vba.WritebackLumaVTaps[k], ++ mode_lib->vba.WritebackChromaHTaps[k], ++ mode_lib->vba.WritebackChromaVTaps[k], ++ mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j]; ++ } else { ++ locals->WritebackDelay[i][k] = 0.0; ++ } ++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { ++ if (mode_lib->vba.BlendingAndTiming[m] == k ++ && mode_lib->vba.WritebackEnable[m] ++ == true) { ++ locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k], ++ mode_lib->vba.WritebackLatency + CalculateWriteBackDelay( ++ mode_lib->vba.WritebackPixelFormat[m], ++ mode_lib->vba.WritebackHRatio[m], ++ mode_lib->vba.WritebackVRatio[m], ++ mode_lib->vba.WritebackLumaHTaps[m], ++ mode_lib->vba.WritebackLumaVTaps[m], ++ mode_lib->vba.WritebackChromaHTaps[m], ++ mode_lib->vba.WritebackChromaVTaps[m], ++ mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]); ++ } ++ } ++ } ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == m) { ++ locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m]; ++ } ++ } ++ } ++ mode_lib->vba.MaxMaxVStartup = 0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] ++ - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); ++ mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]); ++ } ++ ++ mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; ++ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; ++ do { ++ mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; ++ mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; ++ ++ mode_lib->vba.TWait = CalculateTWait( ++ mode_lib->vba.PrefetchMode[i][j], ++ mode_lib->vba.DRAMClockChangeLatency, ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.SREnterPlusExitTime); ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ Pipe myPipe; ++ HostVM myHostVM; ++ ++ if (mode_lib->vba.XFCEnabled[k] == true) { ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = ++ CalculateRemoteSurfaceFlipDelay( ++ mode_lib, ++ mode_lib->vba.VRatio[k], ++ locals->SwathWidthYThisState[k], ++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0), ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.XFCTSlvVupdateOffset, ++ mode_lib->vba.XFCTSlvVupdateWidth, ++ mode_lib->vba.XFCTSlvVreadyOffset, ++ mode_lib->vba.XFCXBUFLatencyTolerance, ++ mode_lib->vba.XFCFillBWOverhead, ++ mode_lib->vba.XFCSlvChunkSize, ++ mode_lib->vba.XFCBusTransportTime, ++ mode_lib->vba.TimeCalc, ++ mode_lib->vba.TWait, ++ &mode_lib->vba.SrcActiveDrainRate, ++ &mode_lib->vba.TInitXFill, ++ &mode_lib->vba.TslvChk); ++ } else { ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; ++ } ++ ++ myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k]; ++ myPipe.DISPCLK = locals->RequiredDISPCLK[i][j]; ++ myPipe.PixelClock = mode_lib->vba.PixelClock[k]; ++ myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep; ++ myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k]; ++ myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; ++ myPipe.SourceScan = mode_lib->vba.SourceScan[k]; ++ myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k]; ++ myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k]; ++ myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k]; ++ myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k]; ++ myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; ++ myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; ++ myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; ++ myPipe.HTotal = mode_lib->vba.HTotal[k]; ++ ++ ++ myHostVM.Enable = mode_lib->vba.HostVMEnable; ++ myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels; ++ myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels; ++ ++ ++ mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule( ++ mode_lib, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ &myPipe, ++ locals->DSCDelayPerState[i][k], ++ mode_lib->vba.DPPCLKDelaySubtotal, ++ mode_lib->vba.DPPCLKDelaySCL, ++ mode_lib->vba.DPPCLKDelaySCLLBOnly, ++ mode_lib->vba.DPPCLKDelayCNVCFormater, ++ mode_lib->vba.DPPCLKDelayCNVCCursor, ++ mode_lib->vba.DISPCLKDelaySubtotal, ++ locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k], ++ mode_lib->vba.OutputFormat[k], ++ mode_lib->vba.MaxInterDCNTileRepeaters, ++ dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]), ++ locals->MaximumVStartup[k], ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.GPUVMEnable, ++ &myHostVM, ++ mode_lib->vba.DynamicMetadataEnable[k], ++ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], ++ mode_lib->vba.DynamicMetadataTransmittedBytes[k], ++ mode_lib->vba.DCCEnable[k], ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.ExtraLatency, ++ mode_lib->vba.TimeCalc, ++ locals->PDEAndMetaPTEBytesPerFrame[k], ++ locals->MetaRowBytes[k], ++ locals->DPTEBytesPerRow[k], ++ locals->PrefetchLinesY[k], ++ locals->SwathWidthYThisState[k], ++ locals->BytePerPixelInDETY[k], ++ locals->PrefillY[k], ++ locals->MaxNumSwY[k], ++ locals->PrefetchLinesC[k], ++ locals->BytePerPixelInDETC[k], ++ locals->PrefillC[k], ++ locals->MaxNumSwC[k], ++ locals->SwathHeightYThisState[k], ++ locals->SwathHeightCThisState[k], ++ mode_lib->vba.TWait, ++ mode_lib->vba.XFCEnabled[k], ++ mode_lib->vba.XFCRemoteSurfaceFlipDelay, ++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, ++ &locals->dst_x_after_scaler, ++ &locals->dst_y_after_scaler, ++ &locals->LineTimesForPrefetch[k], ++ &locals->PrefetchBW[k], ++ &locals->LinesForMetaPTE[k], ++ &locals->LinesForMetaAndDPTERow[k], ++ &locals->VRatioPreY[i][j][k], ++ &locals->VRatioPreC[i][j][k], ++ &locals->RequiredPrefetchPixelDataBWLuma[i][j][k], ++ &locals->RequiredPrefetchPixelDataBWChroma[i][j][k], ++ &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, ++ &locals->Tno_bw[k], ++ &locals->prefetch_vmrow_bw[k], ++ locals->swath_width_luma_ub, ++ locals->swath_width_chroma_ub, ++ &mode_lib->vba.VUpdateOffsetPix[k], ++ &mode_lib->vba.VUpdateWidthPix[k], ++ &mode_lib->vba.VReadyOffsetPix[k]); ++ } ++ mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; ++ mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ uint m; ++ ++ locals->cursor_bw[k] = 0; ++ locals->cursor_bw_pre[k] = 0; ++ for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { ++ locals->cursor_bw[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] ++ / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; ++ locals->cursor_bw_pre[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] ++ / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPreY[i][j][k]; ++ } ++ ++ CalculateUrgentBurstFactor( ++ mode_lib->vba.DETBufferSizeInKByte, ++ locals->SwathHeightYThisState[k], ++ locals->SwathHeightCThisState[k], ++ locals->SwathWidthYThisState[k], ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.CursorBufferSize, ++ mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1], ++ dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]), ++ mode_lib->vba.VRatio[k], ++ locals->VRatioPreY[i][j][k], ++ locals->VRatioPreC[i][j][k], ++ locals->BytePerPixelInDETY[k], ++ locals->BytePerPixelInDETC[k], ++ &locals->UrgentBurstFactorCursor[k], ++ &locals->UrgentBurstFactorCursorPre[k], ++ &locals->UrgentBurstFactorLuma[k], ++ &locals->UrgentBurstFactorLumaPre[k], ++ &locals->UrgentBurstFactorChroma[k], ++ &locals->UrgentBurstFactorChromaPre[k], ++ &locals->NotEnoughUrgentLatencyHiding, ++ &locals->NotEnoughUrgentLatencyHidingPre); ++ ++ if (mode_lib->vba.UseUrgentBurstBandwidth == false) { ++ locals->UrgentBurstFactorCursor[k] = 1; ++ locals->UrgentBurstFactorCursorPre[k] = 1; ++ locals->UrgentBurstFactorLuma[k] = 1; ++ locals->UrgentBurstFactorLumaPre[k] = 1; ++ locals->UrgentBurstFactorChroma[k] = 1; ++ locals->UrgentBurstFactorChromaPre[k] = 1; ++ } ++ ++ mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithoutPrefetch ++ + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + locals->ReadBandwidthLuma[k] ++ * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k] ++ * locals->UrgentBurstFactorChroma[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k]; ++ mode_lib->vba.MaximumReadBandwidthWithPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch ++ + dml_max3(locals->prefetch_vmrow_bw[k], ++ locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k] ++ * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] ++ + locals->meta_row_bw[k] + locals->dpte_row_bw[k], ++ locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k] ++ + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] ++ + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); ++ } ++ locals->BandwidthWithoutPrefetchSupported[i] = true; ++ if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i] ++ || locals->NotEnoughUrgentLatencyHiding == 1) { ++ locals->BandwidthWithoutPrefetchSupported[i] = false; ++ } ++ ++ locals->PrefetchSupported[i][j] = true; ++ if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i] ++ || locals->NotEnoughUrgentLatencyHiding == 1 ++ || locals->NotEnoughUrgentLatencyHidingPre == 1) { ++ locals->PrefetchSupported[i][j] = false; ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->LineTimesForPrefetch[k] < 2.0 ++ || locals->LinesForMetaPTE[k] >= 32.0 ++ || locals->LinesForMetaAndDPTERow[k] >= 16.0 ++ || mode_lib->vba.IsErrorResult[i][j][k] == true) { ++ locals->PrefetchSupported[i][j] = false; ++ } ++ } ++ locals->VRatioInPrefetchSupported[i][j] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->VRatioPreY[i][j][k] > 4.0 ++ || locals->VRatioPreC[i][j][k] > 4.0 ++ || mode_lib->vba.IsErrorResult[i][j][k] == true) { ++ locals->VRatioInPrefetchSupported[i][j] = false; ++ } ++ } ++ mode_lib->vba.AnyLinesForVMOrRowTooLarge = false; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ if (locals->LinesForMetaAndDPTERow[k] >= 16 || locals->LinesForMetaPTE[k] >= 32) { ++ mode_lib->vba.AnyLinesForVMOrRowTooLarge = true; ++ } ++ } ++ ++ if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { ++ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; ++ mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; ++ } else { ++ mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; ++ } ++ } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) ++ && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup ++ || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); ++ ++ if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { ++ mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i]; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip ++ - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] ++ + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] ++ + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], ++ locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k] ++ + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] ++ + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); ++ } ++ mode_lib->vba.TotImmediateFlipBytes = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes ++ + locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]; ++ } ++ ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ CalculateFlipSchedule( ++ mode_lib, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ mode_lib->vba.ExtraLatency, ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.GPUVMMaxPageTableLevels, ++ mode_lib->vba.HostVMEnable, ++ mode_lib->vba.HostVMMaxPageTableLevels, ++ mode_lib->vba.HostVMCachedPageTableLevels, ++ mode_lib->vba.GPUVMEnable, ++ locals->PDEAndMetaPTEBytesPerFrame[k], ++ locals->MetaRowBytes[k], ++ locals->DPTEBytesPerRow[k], ++ mode_lib->vba.BandwidthAvailableForImmediateFlip, ++ mode_lib->vba.TotImmediateFlipBytes, ++ mode_lib->vba.SourcePixelFormat[k], ++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], ++ mode_lib->vba.VRatio[k], ++ locals->Tno_bw[k], ++ mode_lib->vba.DCCEnable[k], ++ locals->dpte_row_height[k], ++ locals->meta_row_height[k], ++ locals->dpte_row_height_chroma[k], ++ locals->meta_row_height_chroma[k], ++ &locals->DestinationLinesToRequestVMInImmediateFlip[k], ++ &locals->DestinationLinesToRequestRowInImmediateFlip[k], ++ &locals->final_flip_bw[k], ++ &locals->ImmediateFlipSupportedForPipe[k]); ++ } ++ mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.total_dcn_read_bw_with_flip = mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3( ++ locals->prefetch_vmrow_bw[k], ++ locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] ++ + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] ++ + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], ++ locals->final_flip_bw[k] + locals->RequiredPrefetchPixelDataBWLuma[i][j][k] ++ * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] ++ * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] ++ * locals->UrgentBurstFactorCursorPre[k]); ++ } ++ locals->ImmediateFlipSupportedForState[i][j] = true; ++ if (mode_lib->vba.total_dcn_read_bw_with_flip ++ > locals->ReturnBWPerState[i]) { ++ locals->ImmediateFlipSupportedForState[i][j] = false; ++ } ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->ImmediateFlipSupportedForPipe[k] == false) { ++ locals->ImmediateFlipSupportedForState[i][j] = false; ++ } ++ } ++ } else { ++ locals->ImmediateFlipSupportedForState[i][j] = false; ++ } ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3( ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); ++ CalculateWatermarksAndDRAMSpeedChangeSupport( ++ mode_lib, ++ mode_lib->vba.PrefetchMode[i][j], ++ mode_lib->vba.NumberOfActivePlanes, ++ mode_lib->vba.MaxLineBufferLines, ++ mode_lib->vba.LineBufferSize, ++ mode_lib->vba.DPPOutputBufferPixels, ++ mode_lib->vba.DETBufferSizeInKByte, ++ mode_lib->vba.WritebackInterfaceLumaBufferSize, ++ mode_lib->vba.WritebackInterfaceChromaBufferSize, ++ mode_lib->vba.DCFCLKPerState[i], ++ mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, ++ locals->ReturnBWPerState[i], ++ mode_lib->vba.GPUVMEnable, ++ locals->dpte_group_bytes, ++ mode_lib->vba.MetaChunkSize, ++ mode_lib->vba.UrgentLatency, ++ mode_lib->vba.ExtraLatency, ++ mode_lib->vba.WritebackLatency, ++ mode_lib->vba.WritebackChunkSize, ++ mode_lib->vba.SOCCLKPerState[i], ++ mode_lib->vba.DRAMClockChangeLatency, ++ mode_lib->vba.SRExitTime, ++ mode_lib->vba.SREnterPlusExitTime, ++ mode_lib->vba.ProjectedDCFCLKDeepSleep, ++ locals->NoOfDPPThisState, ++ mode_lib->vba.DCCEnable, ++ locals->RequiredDPPCLKThisState, ++ locals->SwathWidthYSingleDPP, ++ locals->SwathHeightYThisState, ++ locals->ReadBandwidthLuma, ++ locals->SwathHeightCThisState, ++ locals->ReadBandwidthChroma, ++ mode_lib->vba.LBBitPerPixel, ++ locals->SwathWidthYThisState, ++ mode_lib->vba.HRatio, ++ mode_lib->vba.vtaps, ++ mode_lib->vba.VTAPsChroma, ++ mode_lib->vba.VRatio, ++ mode_lib->vba.HTotal, ++ mode_lib->vba.PixelClock, ++ mode_lib->vba.BlendingAndTiming, ++ locals->BytePerPixelInDETY, ++ locals->BytePerPixelInDETC, ++ mode_lib->vba.WritebackEnable, ++ mode_lib->vba.WritebackPixelFormat, ++ mode_lib->vba.WritebackDestinationWidth, ++ mode_lib->vba.WritebackDestinationHeight, ++ mode_lib->vba.WritebackSourceHeight, ++ &locals->DRAMClockChangeSupport[i][j], ++ &mode_lib->vba.UrgentWatermark, ++ &mode_lib->vba.WritebackUrgentWatermark, ++ &mode_lib->vba.DRAMClockChangeWatermark, ++ &mode_lib->vba.WritebackDRAMClockChangeWatermark, ++ &mode_lib->vba.StutterExitWatermark, ++ &mode_lib->vba.StutterEnterPlusExitWatermark, ++ &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported); ++ } ++ } ++ ++ /*Vertical Active BW support*/ ++ { ++ double MaxTotalVActiveRDBandwidth = 0.0; ++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { ++ MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; ++ } ++ for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { ++ locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min( ++ locals->IdealSDPPortBandwidthPerState[i] * ++ mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation ++ / 100.0, mode_lib->vba.DRAMSpeedPerState[i] * ++ mode_lib->vba.NumberOfChannels * ++ mode_lib->vba.DRAMChannelWidth * ++ mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation ++ / 100.0); ++ ++ if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) { ++ locals->TotalVerticalActiveBandwidthSupport[i] = true; ++ } else { ++ locals->TotalVerticalActiveBandwidthSupport[i] = false; ++ } ++ } ++ } ++ ++ /*PTE Buffer Size Check*/ ++ ++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { ++ for (j = 0; j < 2; j++) { ++ locals->PTEBufferSizeNotExceeded[i][j] = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (locals->PTEBufferSizeNotExceededY[i][j][k] == false ++ || locals->PTEBufferSizeNotExceededC[i][j][k] == false) { ++ locals->PTEBufferSizeNotExceeded[i][j] = false; ++ } ++ } ++ } ++ } ++ /*Cursor Support Check*/ ++ ++ mode_lib->vba.CursorSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.CursorWidth[k][0] > 0.0) { ++ for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { ++ if (mode_lib->vba.CursorBPP[k][m] == 64 && mode_lib->vba.Cursor64BppSupport == false) { ++ mode_lib->vba.CursorSupport = false; ++ } ++ } ++ } ++ } ++ /*Valid Pitch Check*/ ++ ++ mode_lib->vba.PitchSupport = true; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ locals->AlignedYPitch[k] = dml_ceil( ++ dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), ++ locals->MacroTileWidthY[k]); ++ if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { ++ mode_lib->vba.PitchSupport = false; ++ } ++ if (mode_lib->vba.DCCEnable[k] == true) { ++ locals->AlignedDCCMetaPitch[k] = dml_ceil( ++ dml_max( ++ mode_lib->vba.DCCMetaPitchY[k], ++ mode_lib->vba.ViewportWidth[k]), ++ 64.0 * locals->Read256BlockWidthY[k]); ++ } else { ++ locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; ++ } ++ if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { ++ mode_lib->vba.PitchSupport = false; ++ } ++ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 ++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { ++ locals->AlignedCPitch[k] = dml_ceil( ++ dml_max( ++ mode_lib->vba.PitchC[k], ++ mode_lib->vba.ViewportWidth[k] / 2.0), ++ locals->MacroTileWidthC[k]); ++ } else { ++ locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k]; ++ } ++ if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { ++ mode_lib->vba.PitchSupport = false; ++ } ++ } ++ /*Mode Support, Voltage State and SOC Configuration*/ ++ ++ for (i = mode_lib->vba.soc.num_states; i >= 0; i--) { ++ for (j = 0; j < 2; j++) { ++ enum dm_validation_status status = DML_VALIDATION_OK; ++ ++ if (mode_lib->vba.ScaleRatioAndTapsSupport != true) { ++ status = DML_FAIL_SCALE_RATIO_TAP; ++ } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { ++ status = DML_FAIL_SOURCE_PIXEL_FORMAT; ++ } else if (locals->ViewportSizeSupport[i] != true) { ++ status = DML_FAIL_VIEWPORT_SIZE; ++ } else if (locals->DIOSupport[i] != true) { ++ status = DML_FAIL_DIO_SUPPORT; ++ } else if (locals->NotEnoughDSCUnits[i] != false) { ++ status = DML_FAIL_NOT_ENOUGH_DSC; ++ } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { ++ status = DML_FAIL_DSC_CLK_REQUIRED; ++ } else if (locals->ROBSupport[i] != true) { ++ status = DML_FAIL_REORDERING_BUFFER; ++ } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { ++ status = DML_FAIL_DISPCLK_DPPCLK; ++ } else if (locals->TotalAvailablePipesSupport[i][j] != true) { ++ status = DML_FAIL_TOTAL_AVAILABLE_PIPES; ++ } else if (mode_lib->vba.NumberOfOTGSupport != true) { ++ status = DML_FAIL_NUM_OTG; ++ } else if (mode_lib->vba.WritebackModeSupport != true) { ++ status = DML_FAIL_WRITEBACK_MODE; ++ } else if (mode_lib->vba.WritebackLatencySupport != true) { ++ status = DML_FAIL_WRITEBACK_LATENCY; ++ } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) { ++ status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; ++ } else if (mode_lib->vba.CursorSupport != true) { ++ status = DML_FAIL_CURSOR_SUPPORT; ++ } else if (mode_lib->vba.PitchSupport != true) { ++ status = DML_FAIL_PITCH_SUPPORT; ++ } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { ++ status = DML_FAIL_TOTAL_V_ACTIVE_BW; ++ } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { ++ status = DML_FAIL_PTE_BUFFER_SIZE; ++ } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) { ++ status = DML_FAIL_DSC_INPUT_BPC; ++ } else if ((mode_lib->vba.HostVMEnable != false ++ && locals->ImmediateFlipSupportedForState[i][j] != true)) { ++ status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP; ++ } else if (locals->PrefetchSupported[i][j] != true) { ++ status = DML_FAIL_PREFETCH_SUPPORT; ++ } else if (locals->VRatioInPrefetchSupported[i][j] != true) { ++ status = DML_FAIL_V_RATIO_PREFETCH; ++ } ++ ++ if (status == DML_VALIDATION_OK) { ++ locals->ModeSupport[i][j] = true; ++ } else { ++ locals->ModeSupport[i][j] = false; ++ } ++ locals->ValidationStatus[i] = status; ++ } ++ } ++ { ++ unsigned int MaximumMPCCombine = 0; ++ mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1; ++ for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) { ++ if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) { ++ mode_lib->vba.VoltageLevel = i; ++ if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false ++ || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible ++ || (mode_lib->vba.WhenToDoMPCCombine == dm_mpc_reduce_voltage_and_clocks ++ && ((locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vactive ++ && locals->DRAMClockChangeSupport[i][0] != dm_dram_clock_change_vactive) ++ || (locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vblank ++ && locals->DRAMClockChangeSupport[i][0] == dm_dram_clock_change_unsupported))))) { ++ MaximumMPCCombine = 1; ++ } else { ++ MaximumMPCCombine = 0; ++ } ++ break; ++ } ++ } ++ mode_lib->vba.ImmediateFlipSupport = ++ locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; ++ locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; ++ } ++ mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; ++ mode_lib->vba.maxMpcComb = MaximumMPCCombine; ++ } ++ mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; ++ mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; ++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { ++ if (mode_lib->vba.BlendingAndTiming[k] == k) { ++ mode_lib->vba.ODMCombineEnabled[k] = ++ locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; ++ } else { ++ mode_lib->vba.ODMCombineEnabled[k] = 0; ++ } ++ mode_lib->vba.DSCEnabled[k] = ++ locals->RequiresDSC[mode_lib->vba.VoltageLevel][k]; ++ mode_lib->vba.OutputBpp[k] = ++ locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k]; ++ } ++} ++ ++static void CalculateWatermarksAndDRAMSpeedChangeSupport( ++ struct display_mode_lib *mode_lib, ++ unsigned int PrefetchMode, ++ unsigned int NumberOfActivePlanes, ++ unsigned int MaxLineBufferLines, ++ unsigned int LineBufferSize, ++ unsigned int DPPOutputBufferPixels, ++ double DETBufferSizeInKByte, ++ unsigned int WritebackInterfaceLumaBufferSize, ++ unsigned int WritebackInterfaceChromaBufferSize, ++ double DCFCLK, ++ double UrgentOutOfOrderReturn, ++ double ReturnBW, ++ bool GPUVMEnable, ++ long dpte_group_bytes[], ++ unsigned int MetaChunkSize, ++ double UrgentLatency, ++ double ExtraLatency, ++ double WritebackLatency, ++ double WritebackChunkSize, ++ double SOCCLK, ++ double DRAMClockChangeLatency, ++ double SRExitTime, ++ double SREnterPlusExitTime, ++ double DCFCLKDeepSleep, ++ int DPPPerPlane[], ++ bool DCCEnable[], ++ double DPPCLK[], ++ unsigned int SwathWidthSingleDPPY[], ++ unsigned int SwathHeightY[], ++ double ReadBandwidthPlaneLuma[], ++ unsigned int SwathHeightC[], ++ double ReadBandwidthPlaneChroma[], ++ unsigned int LBBitPerPixel[], ++ unsigned int SwathWidthY[], ++ double HRatio[], ++ unsigned int vtaps[], ++ unsigned int VTAPsChroma[], ++ double VRatio[], ++ unsigned int HTotal[], ++ double PixelClock[], ++ unsigned int BlendingAndTiming[], ++ double BytePerPixelDETY[], ++ double BytePerPixelDETC[], ++ bool WritebackEnable[], ++ enum source_format_class WritebackPixelFormat[], ++ double WritebackDestinationWidth[], ++ double WritebackDestinationHeight[], ++ double WritebackSourceHeight[], ++ enum clock_change_support *DRAMClockChangeSupport, ++ double *UrgentWatermark, ++ double *WritebackUrgentWatermark, ++ double *DRAMClockChangeWatermark, ++ double *WritebackDRAMClockChangeWatermark, ++ double *StutterExitWatermark, ++ double *StutterEnterPlusExitWatermark, ++ double *MinActiveDRAMClockChangeLatencySupported) ++{ ++ double EffectiveLBLatencyHidingY; ++ double EffectiveLBLatencyHidingC; ++ double DPPOutputBufferLinesY; ++ double DPPOutputBufferLinesC; ++ double DETBufferSizeY; ++ double DETBufferSizeC; ++ double LinesInDETY[DC__NUM_DPP__MAX]; ++ double LinesInDETC; ++ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; ++ unsigned int LinesInDETCRoundedDownToSwath; ++ double FullDETBufferingTimeY[DC__NUM_DPP__MAX]; ++ double FullDETBufferingTimeC; ++ double ActiveDRAMClockChangeLatencyMarginY; ++ double ActiveDRAMClockChangeLatencyMarginC; ++ double WritebackDRAMClockChangeLatencyMargin; ++ double PlaneWithMinActiveDRAMClockChangeMargin; ++ double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; ++ double FullDETBufferingTimeYStutterCriticalPlane = 0; ++ double TimeToFinishSwathTransferStutterCriticalPlane = 0; ++ uint k, j; ++ ++ mode_lib->vba.TotalActiveDPP = 0; ++ mode_lib->vba.TotalDCCActiveDPP = 0; ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k]; ++ if (DCCEnable[k] == true) { ++ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k]; ++ } ++ } ++ ++ mode_lib->vba.TotalDataReadBandwidth = 0; ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ mode_lib->vba.TotalDataReadBandwidth = mode_lib->vba.TotalDataReadBandwidth ++ + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; ++ } ++ ++ *UrgentWatermark = UrgentLatency + ExtraLatency; ++ ++ *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; ++ ++ mode_lib->vba.TotalActiveWriteback = 0; ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (WritebackEnable[k] == true) { ++ mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; ++ } ++ } ++ ++ if (mode_lib->vba.TotalActiveWriteback <= 1) { ++ *WritebackUrgentWatermark = WritebackLatency; ++ } else { ++ *WritebackUrgentWatermark = WritebackLatency ++ + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; ++ } ++ ++ if (mode_lib->vba.TotalActiveWriteback <= 1) { ++ *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; ++ } else { ++ *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency ++ + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; ++ } ++ ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ ++ mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, ++ dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) ++ - (vtaps[k] - 1); ++ ++ mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, ++ dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / 2 / dml_max(HRatio[k] / 2, 1.0)), 1)) ++ - (VTAPsChroma[k] - 1); ++ ++ EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] ++ * (HTotal[k] / PixelClock[k]); ++ ++ EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC ++ / (VRatio[k] / 2) * (HTotal[k] / PixelClock[k]); ++ ++ if (SwathWidthY[k] > 2 * DPPOutputBufferPixels) { ++ DPPOutputBufferLinesY = (double) DPPOutputBufferPixels / SwathWidthY[k]; ++ } else if (SwathWidthY[k] > DPPOutputBufferPixels) { ++ DPPOutputBufferLinesY = 0.5; ++ } else { ++ DPPOutputBufferLinesY = 1; ++ } ++ ++ if (SwathWidthY[k] / 2.0 > 2 * DPPOutputBufferPixels) { ++ DPPOutputBufferLinesC = (double) DPPOutputBufferPixels ++ / (SwathWidthY[k] / 2.0); ++ } else if (SwathWidthY[k] / 2.0 > DPPOutputBufferPixels) { ++ DPPOutputBufferLinesC = 0.5; ++ } else { ++ DPPOutputBufferLinesC = 1; ++ } ++ ++ CalculateDETBufferSize( ++ DETBufferSizeInKByte, ++ SwathHeightY[k], ++ SwathHeightC[k], ++ &DETBufferSizeY, ++ &DETBufferSizeC); ++ ++ LinesInDETY[k] = DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; ++ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); ++ FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] ++ * (HTotal[k] / PixelClock[k]) / VRatio[k]; ++ if (BytePerPixelDETC[k] > 0) { ++ LinesInDETC = DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0); ++ LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); ++ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath ++ * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2); ++ } else { ++ LinesInDETC = 0; ++ FullDETBufferingTimeC = 999999; ++ } ++ ++ ActiveDRAMClockChangeLatencyMarginY = HTotal[k] / PixelClock[k] ++ * DPPOutputBufferLinesY + EffectiveLBLatencyHidingY ++ + FullDETBufferingTimeY[k] - *DRAMClockChangeWatermark; ++ ++ if (NumberOfActivePlanes > 1) { ++ ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY ++ - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; ++ } ++ ++ if (BytePerPixelDETC[k] > 0) { ++ ActiveDRAMClockChangeLatencyMarginC = HTotal[k] / PixelClock[k] ++ * DPPOutputBufferLinesC + EffectiveLBLatencyHidingC ++ + FullDETBufferingTimeC - *DRAMClockChangeWatermark; ++ if (NumberOfActivePlanes > 1) { ++ ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC ++ - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / (VRatio[k] / 2); ++ } ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( ++ ActiveDRAMClockChangeLatencyMarginY, ++ ActiveDRAMClockChangeLatencyMarginC); ++ } else { ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; ++ } ++ ++ if (WritebackEnable[k] == true) { ++ if (WritebackPixelFormat[k] == dm_444_32) { ++ WritebackDRAMClockChangeLatencyMargin = (WritebackInterfaceLumaBufferSize ++ + WritebackInterfaceChromaBufferSize) / (WritebackDestinationWidth[k] ++ * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] ++ / PixelClock[k]) * 4) - *WritebackDRAMClockChangeWatermark; ++ } else { ++ WritebackDRAMClockChangeLatencyMargin = dml_min( ++ WritebackInterfaceLumaBufferSize * 8.0 / 10, ++ 2 * WritebackInterfaceChromaBufferSize * 8.0 / 10) / (WritebackDestinationWidth[k] ++ * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k])) ++ - *WritebackDRAMClockChangeWatermark; ++ } ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], ++ WritebackDRAMClockChangeLatencyMargin); ++ } ++ } ++ ++ mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; ++ PlaneWithMinActiveDRAMClockChangeMargin = 0; ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] ++ < mode_lib->vba.MinActiveDRAMClockChangeMargin) { ++ mode_lib->vba.MinActiveDRAMClockChangeMargin = ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; ++ if (BlendingAndTiming[k] == k) { ++ PlaneWithMinActiveDRAMClockChangeMargin = k; ++ } else { ++ for (j = 0; j < NumberOfActivePlanes; ++j) { ++ if (BlendingAndTiming[k] == j) { ++ PlaneWithMinActiveDRAMClockChangeMargin = j; ++ } ++ } ++ } ++ } ++ } ++ ++ *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; ++ ++ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) ++ && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) ++ && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] ++ < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { ++ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = ++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; ++ } ++ } ++ ++ mode_lib->vba.TotalNumberOfActiveOTG = 0; ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (BlendingAndTiming[k] == k) { ++ mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1; ++ } ++ } ++ ++ if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { ++ *DRAMClockChangeSupport = dm_dram_clock_change_vactive; ++ } else if (((mode_lib->vba.SynchronizedVBlank == true ++ || mode_lib->vba.TotalNumberOfActiveOTG == 1 ++ || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) ++ && PrefetchMode == 0)) { ++ *DRAMClockChangeSupport = dm_dram_clock_change_vblank; ++ } else { ++ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; ++ } ++ ++ FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0]; ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) { ++ TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] ++ - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) ++ * (HTotal[k] / PixelClock[k]) / VRatio[k]; ++ } ++ } ++ ++ *StutterExitWatermark = SRExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark ++ + ExtraLatency + 10 / DCFCLKDeepSleep; ++ *StutterEnterPlusExitWatermark = dml_max( ++ SREnterPlusExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark ++ + ExtraLatency + 10 / DCFCLKDeepSleep, ++ TimeToFinishSwathTransferStutterCriticalPlane); ++ ++} ++ ++static void CalculateDCFCLKDeepSleep( ++ struct display_mode_lib *mode_lib, ++ unsigned int NumberOfActivePlanes, ++ double BytePerPixelDETY[], ++ double BytePerPixelDETC[], ++ double VRatio[], ++ unsigned int SwathWidthY[], ++ int DPPPerPlane[], ++ double HRatio[], ++ double PixelClock[], ++ double PSCL_THROUGHPUT[], ++ double PSCL_THROUGHPUT_CHROMA[], ++ double DPPCLK[], ++ double *DCFCLKDeepSleep) ++{ ++ uint k; ++ double DisplayPipeLineDeliveryTimeLuma; ++ double DisplayPipeLineDeliveryTimeChroma; ++ //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX]; ++ ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (VRatio[k] <= 1) { ++ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] ++ / HRatio[k] / PixelClock[k]; ++ } else { ++ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] ++ / DPPCLK[k]; ++ } ++ if (BytePerPixelDETC[k] == 0) { ++ DisplayPipeLineDeliveryTimeChroma = 0; ++ } else { ++ if (VRatio[k] / 2 <= 1) { ++ DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0 ++ * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k]; ++ } else { ++ DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0 ++ / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; ++ } ++ } ++ ++ if (BytePerPixelDETC[k] > 0) { ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( ++ 1.1 * SwathWidthY[k] * dml_ceil(BytePerPixelDETY[k], 1) ++ / 32.0 / DisplayPipeLineDeliveryTimeLuma, ++ 1.1 * SwathWidthY[k] / 2.0 ++ * dml_ceil(BytePerPixelDETC[k], 2) / 32.0 ++ / DisplayPipeLineDeliveryTimeChroma); ++ } else { ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] ++ * dml_ceil(BytePerPixelDETY[k], 1) / 64.0 ++ / DisplayPipeLineDeliveryTimeLuma; ++ } ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k], ++ PixelClock[k] / 16); ++ ++ } ++ ++ *DCFCLKDeepSleep = 8; ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ *DCFCLKDeepSleep = dml_max( ++ *DCFCLKDeepSleep, ++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); ++ } ++} ++ ++static void CalculateDETBufferSize( ++ double DETBufferSizeInKByte, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ double *DETBufferSizeY, ++ double *DETBufferSizeC) ++{ ++ if (SwathHeightC == 0) { ++ *DETBufferSizeY = DETBufferSizeInKByte * 1024; ++ *DETBufferSizeC = 0; ++ } else if (SwathHeightY <= SwathHeightC) { ++ *DETBufferSizeY = DETBufferSizeInKByte * 1024 / 2; ++ *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 2; ++ } else { ++ *DETBufferSizeY = DETBufferSizeInKByte * 1024 * 2 / 3; ++ *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 3; ++ } ++} ++ ++static void CalculateUrgentBurstFactor( ++ unsigned int DETBufferSizeInKByte, ++ unsigned int SwathHeightY, ++ unsigned int SwathHeightC, ++ unsigned int SwathWidthY, ++ double LineTime, ++ double UrgentLatency, ++ double CursorBufferSize, ++ unsigned int CursorWidth, ++ unsigned int CursorBPP, ++ double VRatio, ++ double VRatioPreY, ++ double VRatioPreC, ++ double BytePerPixelInDETY, ++ double BytePerPixelInDETC, ++ double *UrgentBurstFactorCursor, ++ double *UrgentBurstFactorCursorPre, ++ double *UrgentBurstFactorLuma, ++ double *UrgentBurstFactorLumaPre, ++ double *UrgentBurstFactorChroma, ++ double *UrgentBurstFactorChromaPre, ++ unsigned int *NotEnoughUrgentLatencyHiding, ++ unsigned int *NotEnoughUrgentLatencyHidingPre) ++{ ++ double LinesInDETLuma; ++ double LinesInDETChroma; ++ unsigned int LinesInCursorBuffer; ++ double CursorBufferSizeInTime; ++ double CursorBufferSizeInTimePre; ++ double DETBufferSizeInTimeLuma; ++ double DETBufferSizeInTimeLumaPre; ++ double DETBufferSizeInTimeChroma; ++ double DETBufferSizeInTimeChromaPre; ++ double DETBufferSizeY; ++ double DETBufferSizeC; ++ ++ *NotEnoughUrgentLatencyHiding = 0; ++ *NotEnoughUrgentLatencyHidingPre = 0; ++ ++ if (CursorWidth > 0) { ++ LinesInCursorBuffer = 1 << (unsigned int) dml_floor( ++ dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); ++ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; ++ if (CursorBufferSizeInTime - UrgentLatency <= 0) { ++ *NotEnoughUrgentLatencyHiding = 1; ++ *UrgentBurstFactorCursor = 0; ++ } else { ++ *UrgentBurstFactorCursor = CursorBufferSizeInTime ++ / (CursorBufferSizeInTime - UrgentLatency); ++ } ++ if (VRatioPreY > 0) { ++ CursorBufferSizeInTimePre = LinesInCursorBuffer * LineTime / VRatioPreY; ++ if (CursorBufferSizeInTimePre - UrgentLatency <= 0) { ++ *NotEnoughUrgentLatencyHidingPre = 1; ++ *UrgentBurstFactorCursorPre = 0; ++ } else { ++ *UrgentBurstFactorCursorPre = CursorBufferSizeInTimePre ++ / (CursorBufferSizeInTimePre - UrgentLatency); ++ } ++ } else { ++ *UrgentBurstFactorCursorPre = 1; ++ } ++ } ++ ++ CalculateDETBufferSize( ++ DETBufferSizeInKByte, ++ SwathHeightY, ++ SwathHeightC, ++ &DETBufferSizeY, ++ &DETBufferSizeC); ++ ++ LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / SwathWidthY; ++ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; ++ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { ++ *NotEnoughUrgentLatencyHiding = 1; ++ *UrgentBurstFactorLuma = 0; ++ } else { ++ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma ++ / (DETBufferSizeInTimeLuma - UrgentLatency); ++ } ++ if (VRatioPreY > 0) { ++ DETBufferSizeInTimeLumaPre = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime ++ / VRatioPreY; ++ if (DETBufferSizeInTimeLumaPre - UrgentLatency <= 0) { ++ *NotEnoughUrgentLatencyHidingPre = 1; ++ *UrgentBurstFactorLumaPre = 0; ++ } else { ++ *UrgentBurstFactorLumaPre = DETBufferSizeInTimeLumaPre ++ / (DETBufferSizeInTimeLumaPre - UrgentLatency); ++ } ++ } else { ++ *UrgentBurstFactorLumaPre = 1; ++ } ++ ++ if (BytePerPixelInDETC > 0) { ++ LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2); ++ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime ++ / (VRatio / 2); ++ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { ++ *NotEnoughUrgentLatencyHiding = 1; ++ *UrgentBurstFactorChroma = 0; ++ } else { ++ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma ++ / (DETBufferSizeInTimeChroma - UrgentLatency); ++ } ++ if (VRatioPreC > 0) { ++ DETBufferSizeInTimeChromaPre = dml_floor(LinesInDETChroma, SwathHeightC) ++ * LineTime / VRatioPreC; ++ if (DETBufferSizeInTimeChromaPre - UrgentLatency <= 0) { ++ *NotEnoughUrgentLatencyHidingPre = 1; ++ *UrgentBurstFactorChromaPre = 0; ++ } else { ++ *UrgentBurstFactorChromaPre = DETBufferSizeInTimeChromaPre ++ / (DETBufferSizeInTimeChromaPre - UrgentLatency); ++ } ++ } else { ++ *UrgentBurstFactorChromaPre = 1; ++ } ++ } ++} ++ ++static void CalculatePixelDeliveryTimes( ++ unsigned int NumberOfActivePlanes, ++ double VRatio[], ++ double VRatioPrefetchY[], ++ double VRatioPrefetchC[], ++ unsigned int swath_width_luma_ub[], ++ unsigned int swath_width_chroma_ub[], ++ int DPPPerPlane[], ++ double HRatio[], ++ double PixelClock[], ++ double PSCL_THROUGHPUT[], ++ double PSCL_THROUGHPUT_CHROMA[], ++ double DPPCLK[], ++ double BytePerPixelDETC[], ++ enum scan_direction_class SourceScan[], ++ unsigned int BlockWidth256BytesY[], ++ unsigned int BlockHeight256BytesY[], ++ unsigned int BlockWidth256BytesC[], ++ unsigned int BlockHeight256BytesC[], ++ double DisplayPipeLineDeliveryTimeLuma[], ++ double DisplayPipeLineDeliveryTimeChroma[], ++ double DisplayPipeLineDeliveryTimeLumaPrefetch[], ++ double DisplayPipeLineDeliveryTimeChromaPrefetch[], ++ double DisplayPipeRequestDeliveryTimeLuma[], ++ double DisplayPipeRequestDeliveryTimeChroma[], ++ double DisplayPipeRequestDeliveryTimeLumaPrefetch[], ++ double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) ++{ ++ double req_per_swath_ub; ++ uint k; ++ ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (VRatio[k] <= 1) { ++ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] ++ / HRatio[k] / PixelClock[k]; ++ } else { ++ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] ++ / PSCL_THROUGHPUT[k] / DPPCLK[k]; ++ } ++ ++ if (BytePerPixelDETC[k] == 0) { ++ DisplayPipeLineDeliveryTimeChroma[k] = 0; ++ } else { ++ if (VRatio[k] / 2 <= 1) { ++ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] ++ * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k]; ++ } else { ++ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] ++ / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; ++ } ++ } ++ ++ if (VRatioPrefetchY[k] <= 1) { ++ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] ++ * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; ++ } else { ++ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] ++ / PSCL_THROUGHPUT[k] / DPPCLK[k]; ++ } ++ ++ if (BytePerPixelDETC[k] == 0) { ++ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; ++ } else { ++ if (VRatioPrefetchC[k] <= 1) { ++ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = ++ swath_width_chroma_ub[k] * DPPPerPlane[k] ++ / (HRatio[k] / 2) / PixelClock[k]; ++ } else { ++ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = ++ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; ++ } ++ } ++ } ++ ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (SourceScan[k] == dm_horz) { ++ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; ++ } else { ++ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; ++ } ++ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] ++ / req_per_swath_ub; ++ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = ++ DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; ++ if (BytePerPixelDETC[k] == 0) { ++ DisplayPipeRequestDeliveryTimeChroma[k] = 0; ++ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; ++ } else { ++ if (SourceScan[k] == dm_horz) { ++ req_per_swath_ub = swath_width_chroma_ub[k] ++ / BlockWidth256BytesC[k]; ++ } else { ++ req_per_swath_ub = swath_width_chroma_ub[k] ++ / BlockHeight256BytesC[k]; ++ } ++ DisplayPipeRequestDeliveryTimeChroma[k] = ++ DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; ++ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = ++ DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; ++ } ++ } ++} ++ ++static void CalculateMetaAndPTETimes( ++ unsigned int NumberOfActivePlanes, ++ bool GPUVMEnable, ++ unsigned int MetaChunkSize, ++ unsigned int MinMetaChunkSizeBytes, ++ unsigned int GPUVMMaxPageTableLevels, ++ unsigned int HTotal[], ++ double VRatio[], ++ double VRatioPrefetchY[], ++ double VRatioPrefetchC[], ++ double DestinationLinesToRequestRowInVBlank[], ++ double DestinationLinesToRequestRowInImmediateFlip[], ++ double DestinationLinesToRequestVMInVBlank[], ++ double DestinationLinesToRequestVMInImmediateFlip[], ++ bool DCCEnable[], ++ double PixelClock[], ++ double BytePerPixelDETY[], ++ double BytePerPixelDETC[], ++ enum scan_direction_class SourceScan[], ++ unsigned int dpte_row_height[], ++ unsigned int dpte_row_height_chroma[], ++ unsigned int meta_row_width[], ++ unsigned int meta_row_height[], ++ unsigned int meta_req_width[], ++ unsigned int meta_req_height[], ++ long dpte_group_bytes[], ++ unsigned int PTERequestSizeY[], ++ unsigned int PTERequestSizeC[], ++ unsigned int PixelPTEReqWidthY[], ++ unsigned int PixelPTEReqHeightY[], ++ unsigned int PixelPTEReqWidthC[], ++ unsigned int PixelPTEReqHeightC[], ++ unsigned int dpte_row_width_luma_ub[], ++ unsigned int dpte_row_width_chroma_ub[], ++ unsigned int vm_group_bytes[], ++ unsigned int dpde0_bytes_per_frame_ub_l[], ++ unsigned int dpde0_bytes_per_frame_ub_c[], ++ unsigned int meta_pte_bytes_per_frame_ub_l[], ++ unsigned int meta_pte_bytes_per_frame_ub_c[], ++ double DST_Y_PER_PTE_ROW_NOM_L[], ++ double DST_Y_PER_PTE_ROW_NOM_C[], ++ double DST_Y_PER_META_ROW_NOM_L[], ++ double TimePerMetaChunkNominal[], ++ double TimePerMetaChunkVBlank[], ++ double TimePerMetaChunkFlip[], ++ double time_per_pte_group_nom_luma[], ++ double time_per_pte_group_vblank_luma[], ++ double time_per_pte_group_flip_luma[], ++ double time_per_pte_group_nom_chroma[], ++ double time_per_pte_group_vblank_chroma[], ++ double time_per_pte_group_flip_chroma[], ++ double TimePerVMGroupVBlank[], ++ double TimePerVMGroupFlip[], ++ double TimePerVMRequestVBlank[], ++ double TimePerVMRequestFlip[]) ++{ ++ unsigned int meta_chunk_width; ++ unsigned int min_meta_chunk_width; ++ unsigned int meta_chunk_per_row_int; ++ unsigned int meta_row_remainder; ++ unsigned int meta_chunk_threshold; ++ unsigned int meta_chunks_per_row_ub; ++ unsigned int dpte_group_width_luma; ++ unsigned int dpte_group_width_chroma; ++ unsigned int dpte_groups_per_row_luma_ub; ++ unsigned int dpte_groups_per_row_chroma_ub; ++ unsigned int num_group_per_lower_vm_stage; ++ unsigned int num_req_per_lower_vm_stage; ++ uint k; ++ ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (GPUVMEnable == true) { ++ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; ++ if (BytePerPixelDETC[k] == 0) { ++ DST_Y_PER_PTE_ROW_NOM_C[k] = 0; ++ } else { ++ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / (VRatio[k] / 2); ++ } ++ } else { ++ DST_Y_PER_PTE_ROW_NOM_L[k] = 0; ++ DST_Y_PER_PTE_ROW_NOM_C[k] = 0; ++ } ++ if (DCCEnable[k] == true) { ++ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; ++ } else { ++ DST_Y_PER_META_ROW_NOM_L[k] = 0; ++ } ++ } ++ ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (DCCEnable[k] == true) { ++ meta_chunk_width = MetaChunkSize * 1024 * 256 ++ / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k]; ++ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 ++ / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k]; ++ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; ++ meta_row_remainder = meta_row_width[k] % meta_chunk_width; ++ if (SourceScan[k] == dm_horz) { ++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; ++ } else { ++ meta_chunk_threshold = 2 * min_meta_chunk_width ++ - meta_req_height[k]; ++ } ++ if (meta_row_remainder <= meta_chunk_threshold) { ++ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; ++ } else { ++ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; ++ } ++ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] ++ / PixelClock[k] / meta_chunks_per_row_ub; ++ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] ++ * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; ++ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] ++ * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; ++ } else { ++ TimePerMetaChunkNominal[k] = 0; ++ TimePerMetaChunkVBlank[k] = 0; ++ TimePerMetaChunkFlip[k] = 0; ++ } ++ } ++ ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (GPUVMEnable == true) { ++ if (SourceScan[k] == dm_horz) { ++ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] ++ * PixelPTEReqWidthY[k]; ++ } else { ++ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] ++ * PixelPTEReqHeightY[k]; ++ } ++ dpte_groups_per_row_luma_ub = dml_ceil( ++ dpte_row_width_luma_ub[k] / dpte_group_width_luma, ++ 1); ++ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] ++ / PixelClock[k] / dpte_groups_per_row_luma_ub; ++ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] ++ * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; ++ time_per_pte_group_flip_luma[k] = ++ DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] ++ / PixelClock[k] ++ / dpte_groups_per_row_luma_ub; ++ if (BytePerPixelDETC[k] == 0) { ++ time_per_pte_group_nom_chroma[k] = 0; ++ time_per_pte_group_vblank_chroma[k] = 0; ++ time_per_pte_group_flip_chroma[k] = 0; ++ } else { ++ if (SourceScan[k] == dm_horz) { ++ dpte_group_width_chroma = dpte_group_bytes[k] ++ / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; ++ } else { ++ dpte_group_width_chroma = dpte_group_bytes[k] ++ / PTERequestSizeC[k] ++ * PixelPTEReqHeightC[k]; ++ } ++ dpte_groups_per_row_chroma_ub = dml_ceil( ++ dpte_row_width_chroma_ub[k] ++ / dpte_group_width_chroma, ++ 1); ++ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] ++ * HTotal[k] / PixelClock[k] ++ / dpte_groups_per_row_chroma_ub; ++ time_per_pte_group_vblank_chroma[k] = ++ DestinationLinesToRequestRowInVBlank[k] * HTotal[k] ++ / PixelClock[k] ++ / dpte_groups_per_row_chroma_ub; ++ time_per_pte_group_flip_chroma[k] = ++ DestinationLinesToRequestRowInImmediateFlip[k] ++ * HTotal[k] / PixelClock[k] ++ / dpte_groups_per_row_chroma_ub; ++ } ++ } else { ++ time_per_pte_group_nom_luma[k] = 0; ++ time_per_pte_group_vblank_luma[k] = 0; ++ time_per_pte_group_flip_luma[k] = 0; ++ time_per_pte_group_nom_chroma[k] = 0; ++ time_per_pte_group_vblank_chroma[k] = 0; ++ time_per_pte_group_flip_chroma[k] = 0; ++ } ++ } ++ ++ for (k = 0; k < NumberOfActivePlanes; ++k) { ++ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { ++ if (DCCEnable[k] == false) { ++ if (BytePerPixelDETC[k] > 0) { ++ num_group_per_lower_vm_stage = ++ dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) ++ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); ++ } else { ++ num_group_per_lower_vm_stage = ++ dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); ++ } ++ } else { ++ if (GPUVMMaxPageTableLevels == 1) { ++ if (BytePerPixelDETC[k] > 0) { ++ num_group_per_lower_vm_stage = ++ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) ++ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); ++ } else { ++ num_group_per_lower_vm_stage = ++ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); ++ } ++ } else { ++ if (BytePerPixelDETC[k] > 0) { ++ num_group_per_lower_vm_stage = ++ dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) ++ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) ++ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) ++ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); ++ } else { ++ num_group_per_lower_vm_stage = ++ dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) ++ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); ++ } ++ } ++ } ++ ++ if (DCCEnable[k] == false) { ++ if (BytePerPixelDETC[k] > 0) { ++ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] ++ / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; ++ } else { ++ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] ++ / 64; ++ } ++ } else { ++ if (GPUVMMaxPageTableLevels == 1) { ++ if (BytePerPixelDETC[k] > 0) { ++ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 ++ + meta_pte_bytes_per_frame_ub_c[k] / 64; ++ } else { ++ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; ++ } ++ } else { ++ if (BytePerPixelDETC[k] > 0) { ++ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 ++ + dpde0_bytes_per_frame_ub_c[k] / 64 ++ + meta_pte_bytes_per_frame_ub_l[k] / 64 ++ + meta_pte_bytes_per_frame_ub_c[k] / 64; ++ } else { ++ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 ++ + meta_pte_bytes_per_frame_ub_l[k] / 64; ++ } ++ } ++ } ++ ++ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] ++ / PixelClock[k] / num_group_per_lower_vm_stage; ++ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] ++ * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; ++ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] ++ * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; ++ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] ++ * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; ++ ++ if (GPUVMMaxPageTableLevels > 2) { ++ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; ++ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; ++ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; ++ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; ++ } ++ ++ } else { ++ TimePerVMGroupVBlank[k] = 0; ++ TimePerVMGroupFlip[k] = 0; ++ TimePerVMRequestVBlank[k] = 0; ++ TimePerVMRequestFlip[k] = 0; ++ } ++ } ++} ++ ++static double CalculateExtraLatency( ++ double UrgentRoundTripAndOutOfOrderLatency, ++ int TotalNumberOfActiveDPP, ++ int PixelChunkSizeInKByte, ++ int TotalNumberOfDCCActiveDPP, ++ int MetaChunkSize, ++ double ReturnBW, ++ bool GPUVMEnable, ++ bool HostVMEnable, ++ int NumberOfActivePlanes, ++ int NumberOfDPP[], ++ long dpte_group_bytes[], ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, ++ double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, ++ int HostVMMaxPageTableLevels, ++ int HostVMCachedPageTableLevels) ++{ ++ double CalculateExtraLatency; ++ double HostVMInefficiencyFactor; ++ int HostVMDynamicLevels; ++ ++ if (GPUVMEnable && HostVMEnable) { ++ HostVMInefficiencyFactor = ++ PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData ++ / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; ++ HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels; ++ } else { ++ HostVMInefficiencyFactor = 1; ++ HostVMDynamicLevels = 0; ++ } ++ ++ CalculateExtraLatency = UrgentRoundTripAndOutOfOrderLatency ++ + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte ++ + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0 ++ / ReturnBW; ++ ++ if (GPUVMEnable) { ++ int k; ++ ++ for (k = 0; k < NumberOfActivePlanes; k++) { ++ CalculateExtraLatency = CalculateExtraLatency ++ + NumberOfDPP[k] * dpte_group_bytes[k] ++ * (1 + 8 * HostVMDynamicLevels) ++ * HostVMInefficiencyFactor / ReturnBW; ++ } ++ } ++ return CalculateExtraLatency; ++} ++ ++#endif +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h +new file mode 100644 +index 000000000000..fb9548a2f894 +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h +@@ -0,0 +1,32 @@ ++/* ++ * Copyright 2017 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#ifndef __DML21_DISPLAY_MODE_VBA_H__ ++#define __DML21_DISPLAY_MODE_VBA_H__ ++ ++void dml21_recalculate(struct display_mode_lib *mode_lib); ++void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); ++ ++#endif /* _DML21_DISPLAY_MODE_VBA_H_ */ +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +new file mode 100644 +index 000000000000..a1f207cbb966 +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +@@ -0,0 +1,1823 @@ ++/* ++ * Copyright 2017 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#ifdef CONFIG_DRM_AMD_DC_DCN2_0 ++ ++#include "../display_mode_lib.h" ++#include "../display_mode_vba.h" ++#include "../dml_inline_defs.h" ++#include "display_rq_dlg_calc_21.h" ++ ++/* ++ * NOTE: ++ * This file is gcc-parseable HW gospel, coming straight from HW engineers. ++ * ++ * It doesn't adhere to Linux kernel style and sometimes will do things in odd ++ * ways. Unless there is something clearly wrong with it the code should ++ * remain as-is as it provides us with a guarantee from HW that it is correct. ++ */ ++ ++static void calculate_ttu_cursor( ++ struct display_mode_lib *mode_lib, ++ double *refcyc_per_req_delivery_pre_cur, ++ double *refcyc_per_req_delivery_cur, ++ double refclk_freq_in_mhz, ++ double ref_freq_to_pix_freq, ++ double hscale_pixel_rate_l, ++ double hscl_ratio, ++ double vratio_pre_l, ++ double vratio_l, ++ unsigned int cur_width, ++ enum cursor_bpp cur_bpp); ++ ++static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) ++{ ++ unsigned int ret_val = 0; ++ ++ if (source_format == dm_444_16) { ++ if (!is_chroma) ++ ret_val = 2; ++ } else if (source_format == dm_444_32) { ++ if (!is_chroma) ++ ret_val = 4; ++ } else if (source_format == dm_444_64) { ++ if (!is_chroma) ++ ret_val = 8; ++ } else if (source_format == dm_420_8) { ++ if (is_chroma) ++ ret_val = 2; ++ else ++ ret_val = 1; ++ } else if (source_format == dm_420_10) { ++ if (is_chroma) ++ ret_val = 4; ++ else ++ ret_val = 2; ++ } else if (source_format == dm_444_8) { ++ ret_val = 1; ++ } ++ return ret_val; ++} ++ ++static bool is_dual_plane(enum source_format_class source_format) ++{ ++ bool ret_val = 0; ++ ++ if ((source_format == dm_420_8) || (source_format == dm_420_10)) ++ ret_val = 1; ++ ++ return ret_val; ++} ++ ++static double get_refcyc_per_delivery( ++ struct display_mode_lib *mode_lib, ++ double refclk_freq_in_mhz, ++ double pclk_freq_in_mhz, ++ bool odm_combine, ++ unsigned int recout_width, ++ unsigned int hactive, ++ double vratio, ++ double hscale_pixel_rate, ++ unsigned int delivery_width, ++ unsigned int req_per_swath_ub) ++{ ++ double refcyc_per_delivery = 0.0; ++ ++ if (vratio <= 1.0) { ++ if (odm_combine) ++ refcyc_per_delivery = (double) refclk_freq_in_mhz ++ * dml_min((double) recout_width, (double) hactive / 2.0) ++ / pclk_freq_in_mhz / (double) req_per_swath_ub; ++ else ++ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width ++ / pclk_freq_in_mhz / (double) req_per_swath_ub; ++ } else { ++ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width ++ / (double) hscale_pixel_rate / (double) req_per_swath_ub; ++ } ++ ++ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); ++ dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); ++ dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); ++ dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); ++ ++ return refcyc_per_delivery; ++ ++} ++ ++static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) ++{ ++ if (tile_size == dm_256k_tile) ++ return (256 * 1024); ++ else if (tile_size == dm_64k_tile) ++ return (64 * 1024); ++ else ++ return (4 * 1024); ++} ++ ++static void extract_rq_sizing_regs( ++ struct display_mode_lib *mode_lib, ++ display_data_rq_regs_st *rq_regs, ++ const display_data_rq_sizing_params_st rq_sizing) ++{ ++ dml_print("DML_DLG: %s: rq_sizing param\n", __func__); ++ print__data_rq_sizing_params_st(mode_lib, rq_sizing); ++ ++ rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10; ++ ++ if (rq_sizing.min_chunk_bytes == 0) ++ rq_regs->min_chunk_size = 0; ++ else ++ rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1; ++ ++ rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10; ++ if (rq_sizing.min_meta_chunk_bytes == 0) ++ rq_regs->min_meta_chunk_size = 0; ++ else ++ rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1; ++ ++ rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6; ++ rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6; ++} ++ ++static void extract_rq_regs( ++ struct display_mode_lib *mode_lib, ++ display_rq_regs_st *rq_regs, ++ const display_rq_params_st rq_param) ++{ ++ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; ++ unsigned int detile_buf_plane1_addr = 0; ++ ++ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l); ++ ++ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor( ++ dml_log2(rq_param.dlg.rq_l.dpte_row_height), ++ 1) - 3; ++ ++ if (rq_param.yuv420) { ++ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c); ++ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor( ++ dml_log2(rq_param.dlg.rq_c.dpte_row_height), ++ 1) - 3; ++ } ++ ++ rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); ++ rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); ++ ++ // FIXME: take the max between luma, chroma chunk size? ++ // okay for now, as we are setting chunk_bytes to 8kb anyways ++ if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb ++ rq_regs->drq_expansion_mode = 0; ++ } else { ++ rq_regs->drq_expansion_mode = 2; ++ } ++ rq_regs->prq_expansion_mode = 1; ++ rq_regs->mrq_expansion_mode = 1; ++ rq_regs->crq_expansion_mode = 1; ++ ++ if (rq_param.yuv420) { ++ if ((double) rq_param.misc.rq_l.stored_swath_bytes ++ / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) { ++ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma ++ } else { ++ detile_buf_plane1_addr = dml_round_to_multiple( ++ (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), ++ 256, ++ 0) / 64.0; // 2/3 to chroma ++ } ++ } ++ rq_regs->plane1_base_address = detile_buf_plane1_addr; ++} ++ ++static void handle_det_buf_split( ++ struct display_mode_lib *mode_lib, ++ display_rq_params_st *rq_param, ++ const display_pipe_source_params_st pipe_src_param) ++{ ++ unsigned int total_swath_bytes = 0; ++ unsigned int swath_bytes_l = 0; ++ unsigned int swath_bytes_c = 0; ++ unsigned int full_swath_bytes_packed_l = 0; ++ unsigned int full_swath_bytes_packed_c = 0; ++ bool req128_l = 0; ++ bool req128_c = 0; ++ bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); ++ bool surf_vert = (pipe_src_param.source_scan == dm_vert); ++ unsigned int log2_swath_height_l = 0; ++ unsigned int log2_swath_height_c = 0; ++ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; ++ ++ full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; ++ full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; ++ ++ if (rq_param->yuv420_10bpc) { ++ full_swath_bytes_packed_l = dml_round_to_multiple( ++ rq_param->misc.rq_l.full_swath_bytes * 2 / 3, ++ 256, ++ 1) + 256; ++ full_swath_bytes_packed_c = dml_round_to_multiple( ++ rq_param->misc.rq_c.full_swath_bytes * 2 / 3, ++ 256, ++ 1) + 256; ++ } ++ ++ if (rq_param->yuv420) { ++ total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; ++ ++ if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request ++ req128_l = 0; ++ req128_c = 0; ++ swath_bytes_l = full_swath_bytes_packed_l; ++ swath_bytes_c = full_swath_bytes_packed_c; ++ } else { //128b request (for luma only for yuv420 8bpc) ++ req128_l = 1; ++ req128_c = 0; ++ swath_bytes_l = full_swath_bytes_packed_l / 2; ++ swath_bytes_c = full_swath_bytes_packed_c; ++ } ++ // Note: assumption, the config that pass in will fit into ++ // the detiled buffer. ++ } else { ++ total_swath_bytes = 2 * full_swath_bytes_packed_l; ++ ++ if (total_swath_bytes <= detile_buf_size_in_bytes) ++ req128_l = 0; ++ else ++ req128_l = 1; ++ ++ swath_bytes_l = total_swath_bytes; ++ swath_bytes_c = 0; ++ } ++ rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; ++ rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; ++ ++ if (surf_linear) { ++ log2_swath_height_l = 0; ++ log2_swath_height_c = 0; ++ } else if (!surf_vert) { ++ log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l; ++ log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c; ++ } else { ++ log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l; ++ log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c; ++ } ++ rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; ++ rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; ++ ++ dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); ++ dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); ++ dml_print( ++ "DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", ++ __func__, ++ full_swath_bytes_packed_l); ++ dml_print( ++ "DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", ++ __func__, ++ full_swath_bytes_packed_c); ++} ++ ++static void get_meta_and_pte_attr( ++ struct display_mode_lib *mode_lib, ++ display_data_rq_dlg_params_st *rq_dlg_param, ++ display_data_rq_misc_params_st *rq_misc_param, ++ display_data_rq_sizing_params_st *rq_sizing_param, ++ unsigned int vp_width, ++ unsigned int vp_height, ++ unsigned int data_pitch, ++ unsigned int meta_pitch, ++ unsigned int source_format, ++ unsigned int tiling, ++ unsigned int macro_tile_size, ++ unsigned int source_scan, ++ unsigned int hostvm_enable, ++ unsigned int is_chroma) ++{ ++ bool surf_linear = (tiling == dm_sw_linear); ++ bool surf_vert = (source_scan == dm_vert); ++ ++ unsigned int bytes_per_element; ++ unsigned int bytes_per_element_y = get_bytes_per_element( ++ (enum source_format_class) (source_format), ++ false); ++ unsigned int bytes_per_element_c = get_bytes_per_element( ++ (enum source_format_class) (source_format), ++ true); ++ ++ unsigned int blk256_width = 0; ++ unsigned int blk256_height = 0; ++ ++ unsigned int blk256_width_y = 0; ++ unsigned int blk256_height_y = 0; ++ unsigned int blk256_width_c = 0; ++ unsigned int blk256_height_c = 0; ++ unsigned int log2_bytes_per_element; ++ unsigned int log2_blk256_width; ++ unsigned int log2_blk256_height; ++ unsigned int blk_bytes; ++ unsigned int log2_blk_bytes; ++ unsigned int log2_blk_height; ++ unsigned int log2_blk_width; ++ unsigned int log2_meta_req_bytes; ++ unsigned int log2_meta_req_height; ++ unsigned int log2_meta_req_width; ++ unsigned int meta_req_width; ++ unsigned int meta_req_height; ++ unsigned int log2_meta_row_height; ++ unsigned int meta_row_width_ub; ++ unsigned int log2_meta_chunk_bytes; ++ unsigned int log2_meta_chunk_height; ++ ++ //full sized meta chunk width in unit of data elements ++ unsigned int log2_meta_chunk_width; ++ unsigned int log2_min_meta_chunk_bytes; ++ unsigned int min_meta_chunk_width; ++ unsigned int meta_chunk_width; ++ unsigned int meta_chunk_per_row_int; ++ unsigned int meta_row_remainder; ++ unsigned int meta_chunk_threshold; ++ unsigned int meta_blk_bytes; ++ unsigned int meta_blk_height; ++ unsigned int meta_blk_width; ++ unsigned int meta_surface_bytes; ++ unsigned int vmpg_bytes; ++ unsigned int meta_pte_req_per_frame_ub; ++ unsigned int meta_pte_bytes_per_frame_ub; ++ const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); ++ const unsigned int dpte_buf_in_pte_reqs = ++ mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; ++ const unsigned int pde_proc_buffer_size_64k_reqs = ++ mode_lib->ip.pde_proc_buffer_size_64k_reqs; ++ ++ unsigned int log2_vmpg_height = 0; ++ unsigned int log2_vmpg_width = 0; ++ unsigned int log2_dpte_req_height_ptes = 0; ++ unsigned int log2_dpte_req_height = 0; ++ unsigned int log2_dpte_req_width = 0; ++ unsigned int log2_dpte_row_height_linear = 0; ++ unsigned int log2_dpte_row_height = 0; ++ unsigned int log2_dpte_group_width = 0; ++ unsigned int dpte_row_width_ub = 0; ++ unsigned int dpte_req_height = 0; ++ unsigned int dpte_req_width = 0; ++ unsigned int dpte_group_width = 0; ++ unsigned int log2_dpte_group_bytes = 0; ++ unsigned int log2_dpte_group_length = 0; ++ unsigned int pde_buf_entries; ++ bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); ++ ++ Calculate256BBlockSizes( ++ (enum source_format_class) (source_format), ++ (enum dm_swizzle_mode) (tiling), ++ bytes_per_element_y, ++ bytes_per_element_c, ++ &blk256_height_y, ++ &blk256_height_c, ++ &blk256_width_y, ++ &blk256_width_c); ++ ++ if (!is_chroma) { ++ blk256_width = blk256_width_y; ++ blk256_height = blk256_height_y; ++ bytes_per_element = bytes_per_element_y; ++ } else { ++ blk256_width = blk256_width_c; ++ blk256_height = blk256_height_c; ++ bytes_per_element = bytes_per_element_c; ++ } ++ ++ log2_bytes_per_element = dml_log2(bytes_per_element); ++ ++ dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); ++ dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); ++ dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); ++ dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); ++ ++ log2_blk256_width = dml_log2((double) blk256_width); ++ log2_blk256_height = dml_log2((double) blk256_height); ++ blk_bytes = surf_linear ? ++ 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); ++ log2_blk_bytes = dml_log2((double) blk_bytes); ++ log2_blk_height = 0; ++ log2_blk_width = 0; ++ ++ // remember log rule ++ // "+" in log is multiply ++ // "-" in log is divide ++ // "/2" is like square root ++ // blk is vertical biased ++ if (tiling != dm_sw_linear) ++ log2_blk_height = log2_blk256_height ++ + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); ++ else ++ log2_blk_height = 0; // blk height of 1 ++ ++ log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; ++ ++ if (!surf_vert) { ++ rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) ++ + blk256_width; ++ rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; ++ } else { ++ rq_dlg_param->swath_width_ub = dml_round_to_multiple( ++ vp_height - 1, ++ blk256_height, ++ 1) + blk256_height; ++ rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; ++ } ++ ++ if (!surf_vert) ++ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height ++ * bytes_per_element; ++ else ++ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width ++ * bytes_per_element; ++ ++ rq_misc_param->blk256_height = blk256_height; ++ rq_misc_param->blk256_width = blk256_width; ++ ++ // ------- ++ // meta ++ // ------- ++ log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element ++ ++ // each 64b meta request for dcn is 8x8 meta elements and ++ // a meta element covers one 256b block of the the data surface. ++ log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 ++ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element ++ - log2_meta_req_height; ++ meta_req_width = 1 << log2_meta_req_width; ++ meta_req_height = 1 << log2_meta_req_height; ++ log2_meta_row_height = 0; ++ meta_row_width_ub = 0; ++ ++ // the dimensions of a meta row are meta_row_width x meta_row_height in elements. ++ // calculate upper bound of the meta_row_width ++ if (!surf_vert) { ++ log2_meta_row_height = log2_meta_req_height; ++ meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) ++ + meta_req_width; ++ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; ++ } else { ++ log2_meta_row_height = log2_meta_req_width; ++ meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) ++ + meta_req_height; ++ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; ++ } ++ rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; ++ ++ rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; ++ ++ log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); ++ log2_meta_chunk_height = log2_meta_row_height; ++ ++ //full sized meta chunk width in unit of data elements ++ log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element ++ - log2_meta_chunk_height; ++ log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); ++ min_meta_chunk_width = 1 ++ << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element ++ - log2_meta_chunk_height); ++ meta_chunk_width = 1 << log2_meta_chunk_width; ++ meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); ++ meta_row_remainder = meta_row_width_ub % meta_chunk_width; ++ meta_chunk_threshold = 0; ++ meta_blk_bytes = 4096; ++ meta_blk_height = blk256_height * 64; ++ meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; ++ meta_surface_bytes = meta_pitch ++ * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) ++ + meta_blk_height) * bytes_per_element / 256; ++ vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; ++ meta_pte_req_per_frame_ub = (dml_round_to_multiple( ++ meta_surface_bytes - vmpg_bytes, ++ 8 * vmpg_bytes, ++ 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); ++ meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request ++ rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; ++ ++ dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); ++ dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); ++ dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); ++ dml_print( ++ "DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", ++ __func__, ++ meta_pte_req_per_frame_ub); ++ dml_print( ++ "DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", ++ __func__, ++ meta_pte_bytes_per_frame_ub); ++ ++ if (!surf_vert) ++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; ++ else ++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; ++ ++ if (meta_row_remainder <= meta_chunk_threshold) ++ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; ++ else ++ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; ++ ++ // ------ ++ // dpte ++ // ------ ++ if (surf_linear) { ++ log2_vmpg_height = 0; // one line high ++ } else { ++ log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; ++ } ++ log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; ++ ++ // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. ++ if (surf_linear) { //one 64B PTE request returns 8 PTEs ++ log2_dpte_req_height_ptes = 0; ++ log2_dpte_req_width = log2_vmpg_width + 3; ++ log2_dpte_req_height = 0; ++ } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size ++ //one 64B req gives 8x1 PTEs for 4KB tile ++ log2_dpte_req_height_ptes = 0; ++ log2_dpte_req_width = log2_blk_width + 3; ++ log2_dpte_req_height = log2_blk_height + 0; ++ } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB ++ //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB ++ log2_dpte_req_height_ptes = 4; ++ log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width ++ log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height ++ } else { //64KB page size and must 64KB tile block ++ //one 64B req gives 8x1 PTEs for 64KB tile ++ log2_dpte_req_height_ptes = 0; ++ log2_dpte_req_width = log2_blk_width + 3; ++ log2_dpte_req_height = log2_blk_height + 0; ++ } ++ ++ // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height ++ // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent ++ // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) ++ //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; ++ //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; ++ dpte_req_height = 1 << log2_dpte_req_height; ++ dpte_req_width = 1 << log2_dpte_req_width; ++ ++ // calculate pitch dpte row buffer can hold ++ // round the result down to a power of two. ++ pde_buf_entries = ++ yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; ++ if (surf_linear) { ++ unsigned int dpte_row_height; ++ ++ log2_dpte_row_height_linear = dml_floor( ++ dml_log2( ++ dml_min( ++ 64 * 1024 * pde_buf_entries ++ / bytes_per_element, ++ dpte_buf_in_pte_reqs ++ * dpte_req_width) ++ / data_pitch), ++ 1); ++ ++ ASSERT(log2_dpte_row_height_linear >= 3); ++ ++ if (log2_dpte_row_height_linear > 7) ++ log2_dpte_row_height_linear = 7; ++ ++ log2_dpte_row_height = log2_dpte_row_height_linear; ++ // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. ++ // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. ++ dpte_row_height = 1 << log2_dpte_row_height; ++ dpte_row_width_ub = dml_round_to_multiple( ++ data_pitch * dpte_row_height - 1, ++ dpte_req_width, ++ 1) + dpte_req_width; ++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; ++ } else { ++ // the upper bound of the dpte_row_width without dependency on viewport position follows. ++ // for tiled mode, row height is the same as req height and row store up to vp size upper bound ++ if (!surf_vert) { ++ log2_dpte_row_height = log2_dpte_req_height; ++ dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) ++ + dpte_req_width; ++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; ++ } else { ++ log2_dpte_row_height = ++ (log2_blk_width < log2_dpte_req_width) ? ++ log2_blk_width : log2_dpte_req_width; ++ dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) ++ + dpte_req_height; ++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; ++ } ++ } ++ if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB ++ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request ++ else ++ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request ++ ++ rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; ++ ++ // the dpte_group_bytes is reduced for the specific case of vertical ++ // access of a tile surface that has dpte request of 8x1 ptes. ++ ++ if (hostvm_enable) ++ rq_sizing_param->dpte_group_bytes = 512; ++ else { ++ if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group ++ rq_sizing_param->dpte_group_bytes = 512; ++ else ++ //full size ++ rq_sizing_param->dpte_group_bytes = 2048; ++ } ++ ++ //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. ++ log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); ++ log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests ++ ++ // full sized data pte group width in elements ++ if (!surf_vert) ++ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; ++ else ++ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; ++ ++ //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B ++ if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB ++ log2_dpte_group_width = log2_dpte_group_width - 1; ++ ++ dpte_group_width = 1 << log2_dpte_group_width; ++ ++ // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, ++ // the upper bound for the dpte groups per row is as follows. ++ rq_dlg_param->dpte_groups_per_row_ub = dml_ceil( ++ (double) dpte_row_width_ub / dpte_group_width, ++ 1); ++} ++ ++static void get_surf_rq_param( ++ struct display_mode_lib *mode_lib, ++ display_data_rq_sizing_params_st *rq_sizing_param, ++ display_data_rq_dlg_params_st *rq_dlg_param, ++ display_data_rq_misc_params_st *rq_misc_param, ++ const display_pipe_params_st pipe_param, ++ bool is_chroma) ++{ ++ bool mode_422 = 0; ++ unsigned int vp_width = 0; ++ unsigned int vp_height = 0; ++ unsigned int data_pitch = 0; ++ unsigned int meta_pitch = 0; ++ unsigned int ppe = mode_422 ? 2 : 1; ++ ++ // FIXME check if ppe apply for both luma and chroma in 422 case ++ if (is_chroma) { ++ vp_width = pipe_param.src.viewport_width_c / ppe; ++ vp_height = pipe_param.src.viewport_height_c; ++ data_pitch = pipe_param.src.data_pitch_c; ++ meta_pitch = pipe_param.src.meta_pitch_c; ++ } else { ++ vp_width = pipe_param.src.viewport_width / ppe; ++ vp_height = pipe_param.src.viewport_height; ++ data_pitch = pipe_param.src.data_pitch; ++ meta_pitch = pipe_param.src.meta_pitch; ++ } ++ ++ if (pipe_param.dest.odm_combine) { ++ unsigned int access_dir; ++ unsigned int full_src_vp_width; ++ unsigned int hactive_half; ++ unsigned int src_hactive_half; ++ access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed ++ hactive_half = pipe_param.dest.hactive / 2; ++ if (is_chroma) { ++ full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width; ++ src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_half; ++ } else { ++ full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width; ++ src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio * hactive_half; ++ } ++ ++ if (access_dir == 0) { ++ vp_width = dml_min(full_src_vp_width, src_hactive_half); ++ dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width); ++ } else { ++ vp_height = dml_min(full_src_vp_width, src_hactive_half); ++ dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height); ++ ++ } ++ dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width); ++ dml_print("DML_DLG: %s: hactive_half = %d\n", __func__, hactive_half); ++ dml_print("DML_DLG: %s: src_hactive_half = %d\n", __func__, src_hactive_half); ++ } ++ rq_sizing_param->chunk_bytes = 8192; ++ ++ if (rq_sizing_param->chunk_bytes == 64 * 1024) ++ rq_sizing_param->min_chunk_bytes = 0; ++ else ++ rq_sizing_param->min_chunk_bytes = 1024; ++ ++ rq_sizing_param->meta_chunk_bytes = 2048; ++ rq_sizing_param->min_meta_chunk_bytes = 256; ++ ++ if (pipe_param.src.hostvm) ++ rq_sizing_param->mpte_group_bytes = 512; ++ else ++ rq_sizing_param->mpte_group_bytes = 2048; ++ ++ get_meta_and_pte_attr( ++ mode_lib, ++ rq_dlg_param, ++ rq_misc_param, ++ rq_sizing_param, ++ vp_width, ++ vp_height, ++ data_pitch, ++ meta_pitch, ++ pipe_param.src.source_format, ++ pipe_param.src.sw_mode, ++ pipe_param.src.macro_tile_size, ++ pipe_param.src.source_scan, ++ pipe_param.src.hostvm, ++ is_chroma); ++} ++ ++static void dml_rq_dlg_get_rq_params( ++ struct display_mode_lib *mode_lib, ++ display_rq_params_st *rq_param, ++ const display_pipe_params_st pipe_param) ++{ ++ // get param for luma surface ++ rq_param->yuv420 = pipe_param.src.source_format == dm_420_8 ++ || pipe_param.src.source_format == dm_420_10; ++ rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10; ++ ++ get_surf_rq_param( ++ mode_lib, ++ &(rq_param->sizing.rq_l), ++ &(rq_param->dlg.rq_l), ++ &(rq_param->misc.rq_l), ++ pipe_param, ++ 0); ++ ++ if (is_dual_plane((enum source_format_class) (pipe_param.src.source_format))) { ++ // get param for chroma surface ++ get_surf_rq_param( ++ mode_lib, ++ &(rq_param->sizing.rq_c), ++ &(rq_param->dlg.rq_c), ++ &(rq_param->misc.rq_c), ++ pipe_param, ++ 1); ++ } ++ ++ // calculate how to split the det buffer space between luma and chroma ++ handle_det_buf_split(mode_lib, rq_param, pipe_param.src); ++ print__rq_params_st(mode_lib, *rq_param); ++} ++ ++void dml21_rq_dlg_get_rq_reg( ++ struct display_mode_lib *mode_lib, ++ display_rq_regs_st *rq_regs, ++ const display_pipe_params_st pipe_param) ++{ ++ display_rq_params_st rq_param = {0}; ++ ++ memset(rq_regs, 0, sizeof(*rq_regs)); ++ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param); ++ extract_rq_regs(mode_lib, rq_regs, rq_param); ++ ++ print__rq_regs_st(mode_lib, *rq_regs); ++} ++ ++// Note: currently taken in as is. ++// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. ++static void dml_rq_dlg_get_dlg_params( ++ struct display_mode_lib *mode_lib, ++ const display_e2e_pipe_params_st *e2e_pipe_param, ++ const unsigned int num_pipes, ++ const unsigned int pipe_idx, ++ display_dlg_regs_st *disp_dlg_regs, ++ display_ttu_regs_st *disp_ttu_regs, ++ const display_rq_dlg_params_st rq_dlg_param, ++ const display_dlg_sys_params_st dlg_sys_param, ++ const bool cstate_en, ++ const bool pstate_en) ++{ ++ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; ++ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; ++ const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; ++ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; ++ const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; ++ const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; ++ ++ // ------------------------- ++ // Section 1.15.2.1: OTG dependent Params ++ // ------------------------- ++ // Timing ++ unsigned int htotal = dst->htotal; ++ // unsigned int hblank_start = dst.hblank_start; // TODO: Remove ++ unsigned int hblank_end = dst->hblank_end; ++ unsigned int vblank_start = dst->vblank_start; ++ unsigned int vblank_end = dst->vblank_end; ++ unsigned int min_vblank = mode_lib->ip.min_vblank_lines; ++ ++ double dppclk_freq_in_mhz = clks->dppclk_mhz; ++ double dispclk_freq_in_mhz = clks->dispclk_mhz; ++ double refclk_freq_in_mhz = clks->refclk_mhz; ++ double pclk_freq_in_mhz = dst->pixel_rate_mhz; ++ bool interlaced = dst->interlaced; ++ ++ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; ++ ++ double min_dcfclk_mhz; ++ double t_calc_us; ++ double min_ttu_vblank; ++ ++ double min_dst_y_ttu_vblank; ++ unsigned int dlg_vblank_start; ++ bool dual_plane; ++ bool mode_422; ++ unsigned int access_dir; ++ unsigned int vp_height_l; ++ unsigned int vp_width_l; ++ unsigned int vp_height_c; ++ unsigned int vp_width_c; ++ ++ // Scaling ++ unsigned int htaps_l; ++ unsigned int htaps_c; ++ double hratio_l; ++ double hratio_c; ++ double vratio_l; ++ double vratio_c; ++ bool scl_enable; ++ ++ double line_time_in_us; ++ // double vinit_l; ++ // double vinit_c; ++ // double vinit_bot_l; ++ // double vinit_bot_c; ++ ++ // unsigned int swath_height_l; ++ unsigned int swath_width_ub_l; ++ // unsigned int dpte_bytes_per_row_ub_l; ++ unsigned int dpte_groups_per_row_ub_l; ++ // unsigned int meta_pte_bytes_per_frame_ub_l; ++ // unsigned int meta_bytes_per_row_ub_l; ++ ++ // unsigned int swath_height_c; ++ unsigned int swath_width_ub_c; ++ // unsigned int dpte_bytes_per_row_ub_c; ++ unsigned int dpte_groups_per_row_ub_c; ++ ++ unsigned int meta_chunks_per_row_ub_l; ++ unsigned int meta_chunks_per_row_ub_c; ++ unsigned int vupdate_offset; ++ unsigned int vupdate_width; ++ unsigned int vready_offset; ++ ++ unsigned int dppclk_delay_subtotal; ++ unsigned int dispclk_delay_subtotal; ++ unsigned int pixel_rate_delay_subtotal; ++ ++ unsigned int vstartup_start; ++ unsigned int dst_x_after_scaler; ++ unsigned int dst_y_after_scaler; ++ double line_wait; ++ double dst_y_prefetch; ++ double dst_y_per_vm_vblank; ++ double dst_y_per_row_vblank; ++ double dst_y_per_vm_flip; ++ double dst_y_per_row_flip; ++ double max_dst_y_per_vm_vblank; ++ double max_dst_y_per_row_vblank; ++ double lsw; ++ double vratio_pre_l; ++ double vratio_pre_c; ++ unsigned int req_per_swath_ub_l; ++ unsigned int req_per_swath_ub_c; ++ unsigned int meta_row_height_l; ++ unsigned int meta_row_height_c; ++ unsigned int swath_width_pixels_ub_l; ++ unsigned int swath_width_pixels_ub_c; ++ unsigned int scaler_rec_in_width_l; ++ unsigned int scaler_rec_in_width_c; ++ unsigned int dpte_row_height_l; ++ unsigned int dpte_row_height_c; ++ double hscale_pixel_rate_l; ++ double hscale_pixel_rate_c; ++ double min_hratio_fact_l; ++ double min_hratio_fact_c; ++ double refcyc_per_line_delivery_pre_l; ++ double refcyc_per_line_delivery_pre_c; ++ double refcyc_per_line_delivery_l; ++ double refcyc_per_line_delivery_c; ++ ++ double refcyc_per_req_delivery_pre_l; ++ double refcyc_per_req_delivery_pre_c; ++ double refcyc_per_req_delivery_l; ++ double refcyc_per_req_delivery_c; ++ ++ unsigned int full_recout_width; ++ double xfc_transfer_delay; ++ double xfc_precharge_delay; ++ double xfc_remote_surface_flip_latency; ++ double xfc_dst_y_delta_drq_limit; ++ double xfc_prefetch_margin; ++ double refcyc_per_req_delivery_pre_cur0; ++ double refcyc_per_req_delivery_cur0; ++ double refcyc_per_req_delivery_pre_cur1; ++ double refcyc_per_req_delivery_cur1; ++ ++ memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); ++ memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); ++ ++ dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); ++ dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); ++ ++ dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); ++ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); ++ ASSERT(ref_freq_to_pix_freq < 4.0); ++ ++ disp_dlg_regs->ref_freq_to_pix_freq = ++ (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); ++ disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal ++ * dml_pow(2, 8)); ++ disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits ++ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end ++ * (double) ref_freq_to_pix_freq); ++ ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13)); ++ ++ min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; ++ t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); ++ min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; ++ dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; ++ ++ disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); ++ ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); ++ ++ dml_print( ++ "DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", ++ __func__, ++ min_dcfclk_mhz); ++ dml_print( ++ "DML_DLG: %s: min_ttu_vblank = %3.2f\n", ++ __func__, ++ min_ttu_vblank); ++ dml_print( ++ "DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", ++ __func__, ++ min_dst_y_ttu_vblank); ++ dml_print( ++ "DML_DLG: %s: t_calc_us = %3.2f\n", ++ __func__, ++ t_calc_us); ++ dml_print( ++ "DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", ++ __func__, ++ disp_dlg_regs->min_dst_y_next_start); ++ dml_print( ++ "DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", ++ __func__, ++ ref_freq_to_pix_freq); ++ ++ // ------------------------- ++ // Section 1.15.2.2: Prefetch, Active and TTU ++ // ------------------------- ++ // Prefetch Calc ++ // Source ++ // dcc_en = src.dcc; ++ dual_plane = is_dual_plane((enum source_format_class) (src->source_format)); ++ mode_422 = 0; // FIXME ++ access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed ++ // bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); ++ // bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); ++ vp_height_l = src->viewport_height; ++ vp_width_l = src->viewport_width; ++ vp_height_c = src->viewport_height_c; ++ vp_width_c = src->viewport_width_c; ++ ++ // Scaling ++ htaps_l = taps->htaps; ++ htaps_c = taps->htaps_c; ++ hratio_l = scl->hscl_ratio; ++ hratio_c = scl->hscl_ratio_c; ++ vratio_l = scl->vscl_ratio; ++ vratio_c = scl->vscl_ratio_c; ++ scl_enable = scl->scl_enable; ++ ++ line_time_in_us = (htotal / pclk_freq_in_mhz); ++ swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub; ++ dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub; ++ swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub; ++ dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub; ++ ++ meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub; ++ meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub; ++ vupdate_offset = dst->vupdate_offset; ++ vupdate_width = dst->vupdate_width; ++ vready_offset = dst->vready_offset; ++ ++ dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; ++ dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; ++ ++ if (scl_enable) ++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; ++ else ++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; ++ ++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter ++ + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; ++ ++ if (dout->dsc_enable) { ++ double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ dispclk_delay_subtotal += dsc_delay; ++ } ++ ++ pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz ++ + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; ++ ++ vstartup_start = dst->vstartup_start; ++ if (interlaced) { ++ if (vstartup_start / 2.0 ++ - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal ++ <= vblank_end / 2.0) ++ disp_dlg_regs->vready_after_vcount0 = 1; ++ else ++ disp_dlg_regs->vready_after_vcount0 = 0; ++ } else { ++ if (vstartup_start ++ - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal ++ <= vblank_end) ++ disp_dlg_regs->vready_after_vcount0 = 1; ++ else ++ disp_dlg_regs->vready_after_vcount0 = 0; ++ } ++ ++ // TODO: Where is this coming from? ++ if (interlaced) ++ vstartup_start = vstartup_start / 2; ++ ++ // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? ++ if (vstartup_start >= min_vblank) { ++ dml_print( ++ "WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", ++ __func__, ++ vblank_start, ++ vblank_end); ++ dml_print( ++ "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", ++ __func__, ++ vstartup_start, ++ min_vblank); ++ min_vblank = vstartup_start + 1; ++ dml_print( ++ "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", ++ __func__, ++ vstartup_start, ++ min_vblank); ++ } ++ ++ dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); ++ dml_print( ++ "DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", ++ __func__, ++ pixel_rate_delay_subtotal); ++ dml_print( ++ "DML_DLG: %s: dst_x_after_scaler = %d\n", ++ __func__, ++ dst_x_after_scaler); ++ dml_print( ++ "DML_DLG: %s: dst_y_after_scaler = %d\n", ++ __func__, ++ dst_y_after_scaler); ++ ++ // Lwait ++ // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us? ++ line_wait = mode_lib->soc.urgent_latency_pixel_data_only_us; ++ if (cstate_en) ++ line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); ++ if (pstate_en) ++ line_wait = dml_max( ++ mode_lib->soc.dram_clock_change_latency_us ++ + mode_lib->soc.urgent_latency_pixel_data_only_us, // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us? ++ line_wait); ++ line_wait = line_wait / line_time_in_us; ++ ++ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); ++ ++ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ dst_y_per_row_vblank = get_dst_y_per_row_vblank( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ max_dst_y_per_vm_vblank = 32.0; ++ max_dst_y_per_row_vblank = 16.0; ++ ++ // magic! ++ if (htotal <= 75) { ++ min_vblank = 300; ++ max_dst_y_per_vm_vblank = 100.0; ++ max_dst_y_per_row_vblank = 100.0; ++ } ++ ++ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); ++ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); ++ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); ++ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); ++ ++ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); ++ ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); ++ ++ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); ++ lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); ++ ++ dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); ++ ++ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ ++ dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); ++ dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); ++ ++ // Active ++ req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub; ++ req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub; ++ meta_row_height_l = rq_dlg_param.rq_l.meta_row_height; ++ meta_row_height_c = rq_dlg_param.rq_c.meta_row_height; ++ swath_width_pixels_ub_l = 0; ++ swath_width_pixels_ub_c = 0; ++ scaler_rec_in_width_l = 0; ++ scaler_rec_in_width_c = 0; ++ dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height; ++ dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height; ++ ++ if (mode_422) { ++ swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element ++ swath_width_pixels_ub_c = swath_width_ub_c * 2; ++ } else { ++ swath_width_pixels_ub_l = swath_width_ub_l * 1; ++ swath_width_pixels_ub_c = swath_width_ub_c * 1; ++ } ++ ++ hscale_pixel_rate_l = 0.; ++ hscale_pixel_rate_c = 0.; ++ min_hratio_fact_l = 1.0; ++ min_hratio_fact_c = 1.0; ++ ++ if (htaps_l <= 1) ++ min_hratio_fact_l = 2.0; ++ else if (htaps_l <= 6) { ++ if ((hratio_l * 2.0) > 4.0) ++ min_hratio_fact_l = 4.0; ++ else ++ min_hratio_fact_l = hratio_l * 2.0; ++ } else { ++ if (hratio_l > 4.0) ++ min_hratio_fact_l = 4.0; ++ else ++ min_hratio_fact_l = hratio_l; ++ } ++ ++ hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; ++ ++ if (htaps_c <= 1) ++ min_hratio_fact_c = 2.0; ++ else if (htaps_c <= 6) { ++ if ((hratio_c * 2.0) > 4.0) ++ min_hratio_fact_c = 4.0; ++ else ++ min_hratio_fact_c = hratio_c * 2.0; ++ } else { ++ if (hratio_c > 4.0) ++ min_hratio_fact_c = 4.0; ++ else ++ min_hratio_fact_c = hratio_c; ++ } ++ ++ hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; ++ ++ refcyc_per_line_delivery_pre_l = 0.; ++ refcyc_per_line_delivery_pre_c = 0.; ++ refcyc_per_line_delivery_l = 0.; ++ refcyc_per_line_delivery_c = 0.; ++ ++ refcyc_per_req_delivery_pre_l = 0.; ++ refcyc_per_req_delivery_pre_c = 0.; ++ refcyc_per_req_delivery_l = 0.; ++ refcyc_per_req_delivery_c = 0.; ++ ++ full_recout_width = 0; ++ // In ODM ++ if (src->is_hsplit) { ++ // This "hack" is only allowed (and valid) for MPC combine. In ODM ++ // combine, you MUST specify the full_recout_width...according to Oswin ++ if (dst->full_recout_width == 0 && !dst->odm_combine) { ++ dml_print( ++ "DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", ++ __func__); ++ full_recout_width = dst->recout_width * 2; // assume half split for dcn1 ++ } else ++ full_recout_width = dst->full_recout_width; ++ } else ++ full_recout_width = dst->recout_width; ++ ++ // As of DCN2, mpc_combine and odm_combine are mutually exclusive ++ refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery( ++ mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_pre_l, ++ hscale_pixel_rate_l, ++ swath_width_pixels_ub_l, ++ 1); // per line ++ ++ refcyc_per_line_delivery_l = get_refcyc_per_delivery( ++ mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_l, ++ hscale_pixel_rate_l, ++ swath_width_pixels_ub_l, ++ 1); // per line ++ ++ dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width); ++ dml_print( ++ "DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", ++ __func__, ++ hscale_pixel_rate_l); ++ dml_print( ++ "DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", ++ __func__, ++ refcyc_per_line_delivery_pre_l); ++ dml_print( ++ "DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", ++ __func__, ++ refcyc_per_line_delivery_l); ++ ++ if (dual_plane) { ++ refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery( ++ mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_pre_c, ++ hscale_pixel_rate_c, ++ swath_width_pixels_ub_c, ++ 1); // per line ++ ++ refcyc_per_line_delivery_c = get_refcyc_per_delivery( ++ mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_c, ++ hscale_pixel_rate_c, ++ swath_width_pixels_ub_c, ++ 1); // per line ++ ++ dml_print( ++ "DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", ++ __func__, ++ refcyc_per_line_delivery_pre_c); ++ dml_print( ++ "DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", ++ __func__, ++ refcyc_per_line_delivery_c); ++ } ++ ++ // TTU - Luma / Chroma ++ if (access_dir) { // vertical access ++ scaler_rec_in_width_l = vp_height_l; ++ scaler_rec_in_width_c = vp_height_c; ++ } else { ++ scaler_rec_in_width_l = vp_width_l; ++ scaler_rec_in_width_c = vp_width_c; ++ } ++ ++ refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery( ++ mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_pre_l, ++ hscale_pixel_rate_l, ++ scaler_rec_in_width_l, ++ req_per_swath_ub_l); // per req ++ refcyc_per_req_delivery_l = get_refcyc_per_delivery( ++ mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_l, ++ hscale_pixel_rate_l, ++ scaler_rec_in_width_l, ++ req_per_swath_ub_l); // per req ++ ++ dml_print( ++ "DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", ++ __func__, ++ refcyc_per_req_delivery_pre_l); ++ dml_print( ++ "DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", ++ __func__, ++ refcyc_per_req_delivery_l); ++ ++ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); ++ ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); ++ ++ if (dual_plane) { ++ refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery( ++ mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_pre_c, ++ hscale_pixel_rate_c, ++ scaler_rec_in_width_c, ++ req_per_swath_ub_c); // per req ++ refcyc_per_req_delivery_c = get_refcyc_per_delivery( ++ mode_lib, ++ refclk_freq_in_mhz, ++ pclk_freq_in_mhz, ++ dst->odm_combine, ++ full_recout_width, ++ dst->hactive, ++ vratio_c, ++ hscale_pixel_rate_c, ++ scaler_rec_in_width_c, ++ req_per_swath_ub_c); // per req ++ ++ dml_print( ++ "DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", ++ __func__, ++ refcyc_per_req_delivery_pre_c); ++ dml_print( ++ "DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", ++ __func__, ++ refcyc_per_req_delivery_c); ++ ++ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); ++ ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); ++ } ++ ++ // XFC ++ xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); ++ xfc_precharge_delay = get_xfc_precharge_delay( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency; ++ xfc_prefetch_margin = get_xfc_prefetch_margin( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx); ++ ++ // TTU - Cursor ++ refcyc_per_req_delivery_pre_cur0 = 0.0; ++ refcyc_per_req_delivery_cur0 = 0.0; ++ if (src->num_cursors > 0) { ++ calculate_ttu_cursor( ++ mode_lib, ++ &refcyc_per_req_delivery_pre_cur0, ++ &refcyc_per_req_delivery_cur0, ++ refclk_freq_in_mhz, ++ ref_freq_to_pix_freq, ++ hscale_pixel_rate_l, ++ scl->hscl_ratio, ++ vratio_pre_l, ++ vratio_l, ++ src->cur0_src_width, ++ (enum cursor_bpp) (src->cur0_bpp)); ++ } ++ ++ refcyc_per_req_delivery_pre_cur1 = 0.0; ++ refcyc_per_req_delivery_cur1 = 0.0; ++ if (src->num_cursors > 1) { ++ calculate_ttu_cursor( ++ mode_lib, ++ &refcyc_per_req_delivery_pre_cur1, ++ &refcyc_per_req_delivery_cur1, ++ refclk_freq_in_mhz, ++ ref_freq_to_pix_freq, ++ hscale_pixel_rate_l, ++ scl->hscl_ratio, ++ vratio_pre_l, ++ vratio_l, ++ src->cur1_src_width, ++ (enum cursor_bpp) (src->cur1_bpp)); ++ } ++ ++ // TTU - Misc ++ // all hard-coded ++ ++ // Assignment to register structures ++ disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line ++ disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk ++ ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13)); ++ disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); ++ disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); ++ disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); ++ disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); ++ disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); ++ ++ disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); ++ disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); ++ ++ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); ++ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); ++ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); ++ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); ++ ++ disp_dlg_regs->refcyc_per_pte_group_vblank_l = ++ (unsigned int) (dst_y_per_row_vblank * (double) htotal ++ * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); ++ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13)); ++ ++ if (dual_plane) { ++ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank ++ * (double) htotal * ref_freq_to_pix_freq ++ / (double) dpte_groups_per_row_ub_c); ++ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c ++ < (unsigned int)dml_pow(2, 13)); ++ } ++ ++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = ++ (unsigned int) (dst_y_per_row_vblank * (double) htotal ++ * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); ++ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13)); ++ ++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = ++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now ++ ++ disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal ++ * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; ++ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal ++ * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; ++ ++ if (dual_plane) { ++ disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip ++ * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; ++ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip ++ * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; ++ } ++ ++ disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; ++ disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; ++ disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;; ++ disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;; ++ ++ // Clamp to max for now ++ if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1; ++ ++ if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1; ++ ++ if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1; ++ ++ if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1; ++ disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l ++ / (double) vratio_l * dml_pow(2, 2)); ++ ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17)); ++ ++ if (dual_plane) { ++ disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c ++ / (double) vratio_c * dml_pow(2, 2)); ++ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { ++ dml_print( ++ "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", ++ __func__, ++ disp_dlg_regs->dst_y_per_pte_row_nom_c, ++ (unsigned int)dml_pow(2, 17) - 1); ++ } ++ } ++ ++ disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l ++ / (double) vratio_l * dml_pow(2, 2)); ++ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17)); ++ ++ disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now ++ ++ dml_print( ++ "DML: Trow: %fus\n", ++ line_time_in_us * (double)dpte_row_height_l / (double)vratio_l); ++ ++ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l ++ / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq ++ / (double) dpte_groups_per_row_ub_l); ++ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; ++ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l ++ / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq ++ / (double) meta_chunks_per_row_ub_l); ++ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; ++ ++ if (dual_plane) { ++ disp_dlg_regs->refcyc_per_pte_group_nom_c = ++ (unsigned int) ((double) dpte_row_height_c / (double) vratio_c ++ * (double) htotal * ref_freq_to_pix_freq ++ / (double) dpte_groups_per_row_ub_c); ++ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; ++ ++ // TODO: Is this the right calculation? Does htotal need to be halved? ++ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = ++ (unsigned int) ((double) meta_row_height_c / (double) vratio_c ++ * (double) htotal * ref_freq_to_pix_freq ++ / (double) meta_chunks_per_row_ub_c); ++ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) ++ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; ++ } ++ ++ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor( ++ refcyc_per_line_delivery_pre_l, 1); ++ disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor( ++ refcyc_per_line_delivery_l, 1); ++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); ++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13)); ++ ++ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor( ++ refcyc_per_line_delivery_pre_c, 1); ++ disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor( ++ refcyc_per_line_delivery_c, 1); ++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); ++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13)); ++ ++ disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; ++ disp_dlg_regs->dst_y_offset_cur0 = 0; ++ disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; ++ disp_dlg_regs->dst_y_offset_cur1 = 0; ++ ++ disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay; ++ disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay; ++ disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency; ++ disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil( ++ xfc_prefetch_margin * refclk_freq_in_mhz, 1); ++ ++ // slave has to have this value also set to off ++ if (src->xfc_enable && !src->xfc_slave) ++ disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1); ++ else ++ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off ++ ++ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = ++ (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 ++ * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = ++ (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); ++ disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 ++ * dml_pow(2, 10)); ++ disp_ttu_regs->qos_level_low_wm = 0; ++ ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); ++ disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal ++ * ref_freq_to_pix_freq); ++ ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14)); ++ ++ disp_ttu_regs->qos_level_flip = 14; ++ disp_ttu_regs->qos_level_fixed_l = 8; ++ disp_ttu_regs->qos_level_fixed_c = 8; ++ disp_ttu_regs->qos_level_fixed_cur0 = 8; ++ disp_ttu_regs->qos_ramp_disable_l = 0; ++ disp_ttu_regs->qos_ramp_disable_c = 0; ++ disp_ttu_regs->qos_ramp_disable_cur0 = 0; ++ ++ disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; ++ ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); ++ ++ print__ttu_regs_st(mode_lib, *disp_ttu_regs); ++ print__dlg_regs_st(mode_lib, *disp_dlg_regs); ++} ++ ++void dml21_rq_dlg_get_dlg_reg( ++ struct display_mode_lib *mode_lib, ++ display_dlg_regs_st *dlg_regs, ++ display_ttu_regs_st *ttu_regs, ++ display_e2e_pipe_params_st *e2e_pipe_param, ++ const unsigned int num_pipes, ++ const unsigned int pipe_idx, ++ const bool cstate_en, ++ const bool pstate_en, ++ const bool vm_en, ++ const bool ignore_viewport_pos, ++ const bool immediate_flip_support) ++{ ++ display_rq_params_st rq_param = {0}; ++ display_dlg_sys_params_st dlg_sys_param = {0}; ++ ++ // Get watermark and Tex. ++ dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes); ++ dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); ++ dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes); ++ dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes); ++ dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency ++ / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated ++ ++ print__dlg_sys_params_st(mode_lib, dlg_sys_param); ++ ++ // system parameter calculation done ++ ++ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); ++ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe); ++ dml_rq_dlg_get_dlg_params( ++ mode_lib, ++ e2e_pipe_param, ++ num_pipes, ++ pipe_idx, ++ dlg_regs, ++ ttu_regs, ++ rq_param.dlg, ++ dlg_sys_param, ++ cstate_en, ++ pstate_en); ++ dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); ++} ++ ++void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param) ++{ ++ memset(arb_param, 0, sizeof(*arb_param)); ++ arb_param->max_req_outstanding = 256; ++ arb_param->min_req_outstanding = 68; ++ arb_param->sat_level_us = 60; ++} ++ ++static void calculate_ttu_cursor( ++ struct display_mode_lib *mode_lib, ++ double *refcyc_per_req_delivery_pre_cur, ++ double *refcyc_per_req_delivery_cur, ++ double refclk_freq_in_mhz, ++ double ref_freq_to_pix_freq, ++ double hscale_pixel_rate_l, ++ double hscl_ratio, ++ double vratio_pre_l, ++ double vratio_l, ++ unsigned int cur_width, ++ enum cursor_bpp cur_bpp) ++{ ++ unsigned int cur_src_width = cur_width; ++ unsigned int cur_req_size = 0; ++ unsigned int cur_req_width = 0; ++ double cur_width_ub = 0.0; ++ double cur_req_per_width = 0.0; ++ double hactive_cur = 0.0; ++ ++ ASSERT(cur_src_width <= 256); ++ ++ *refcyc_per_req_delivery_pre_cur = 0.0; ++ *refcyc_per_req_delivery_cur = 0.0; ++ if (cur_src_width > 0) { ++ unsigned int cur_bit_per_pixel = 0; ++ ++ if (cur_bpp == dm_cur_2bit) { ++ cur_req_size = 64; // byte ++ cur_bit_per_pixel = 2; ++ } else { // 32bit ++ cur_bit_per_pixel = 32; ++ if (cur_src_width >= 1 && cur_src_width <= 16) ++ cur_req_size = 64; ++ else if (cur_src_width >= 17 && cur_src_width <= 31) ++ cur_req_size = 128; ++ else ++ cur_req_size = 256; ++ } ++ ++ cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); ++ cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) ++ * (double) cur_req_width; ++ cur_req_per_width = cur_width_ub / (double) cur_req_width; ++ hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor ++ ++ if (vratio_pre_l <= 1.0) { ++ *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq ++ / (double) cur_req_per_width; ++ } else { ++ *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz ++ * (double) cur_src_width / hscale_pixel_rate_l ++ / (double) cur_req_per_width; ++ } ++ ++ ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); ++ ++ if (vratio_l <= 1.0) { ++ *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq ++ / (double) cur_req_per_width; ++ } else { ++ *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz ++ * (double) cur_src_width / hscale_pixel_rate_l ++ / (double) cur_req_per_width; ++ } ++ ++ dml_print( ++ "DML_DLG: %s: cur_req_width = %d\n", ++ __func__, ++ cur_req_width); ++ dml_print( ++ "DML_DLG: %s: cur_width_ub = %3.2f\n", ++ __func__, ++ cur_width_ub); ++ dml_print( ++ "DML_DLG: %s: cur_req_per_width = %3.2f\n", ++ __func__, ++ cur_req_per_width); ++ dml_print( ++ "DML_DLG: %s: hactive_cur = %3.2f\n", ++ __func__, ++ hactive_cur); ++ dml_print( ++ "DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", ++ __func__, ++ *refcyc_per_req_delivery_pre_cur); ++ dml_print( ++ "DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", ++ __func__, ++ *refcyc_per_req_delivery_cur); ++ ++ ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); ++ } ++} ++ ++#endif +diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h +new file mode 100644 +index 000000000000..83e95f8cbff2 +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h +@@ -0,0 +1,73 @@ ++/* ++ * Copyright 2017 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#ifndef __DML21_DISPLAY_RQ_DLG_CALC_H__ ++#define __DML21_DISPLAY_RQ_DLG_CALC_H__ ++ ++#include "../dml_common_defs.h" ++#include "../display_rq_dlg_helpers.h" ++ ++struct display_mode_lib; ++ ++ ++// Function: dml_rq_dlg_get_rq_reg ++// Main entry point for test to get the register values out of this DML class. ++// This function calls <get_rq_param> and <extract_rq_regs> functions to calculate ++// and then populate the rq_regs struct ++// Input: ++// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) ++// Output: ++// rq_regs - struct that holds all the RQ registers field value. ++// See also: <display_rq_regs_st> ++void dml21_rq_dlg_get_rq_reg( ++ struct display_mode_lib *mode_lib, ++ display_rq_regs_st *rq_regs, ++ const display_pipe_params_st pipe_param); ++ ++// Function: dml_rq_dlg_get_dlg_reg ++// Calculate and return DLG and TTU register struct given the system setting ++// Output: ++// dlg_regs - output DLG register struct ++// ttu_regs - output DLG TTU register struct ++// Input: ++// e2e_pipe_param - "compacted" array of e2e pipe param struct ++// num_pipes - num of active "pipe" or "route" ++// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg ++// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. ++// Added for legacy or unrealistic timing tests. ++void dml21_rq_dlg_get_dlg_reg( ++ struct display_mode_lib *mode_lib, ++ display_dlg_regs_st *dlg_regs, ++ display_ttu_regs_st *ttu_regs, ++ display_e2e_pipe_params_st *e2e_pipe_param, ++ const unsigned int num_pipes, ++ const unsigned int pipe_idx, ++ const bool cstate_en, ++ const bool pstate_en, ++ const bool vm_en, ++ const bool ignore_viewport_pos, ++ const bool immediate_flip_support); ++ ++#endif +diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +index 870716e3c132..d8c59aa356b6 100644 +--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h ++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +@@ -38,6 +38,9 @@ enum dml_project { + DML_PROJECT_NAVI10, + DML_PROJECT_NAVI10v2, + #endif ++#ifdef CONFIG_DRM_AMD_DC_DCN2_1 ++ DML_PROJECT_DCN21, ++#endif + }; + + struct display_mode_lib; +-- +2.17.1 + |