diff options
Diffstat (limited to 'common/recipes-kernel/linux/files/0713-drm-amdgpu-Fixup-hw-vblank-counter-ts-for-new-drm_up.patch')
-rw-r--r-- | common/recipes-kernel/linux/files/0713-drm-amdgpu-Fixup-hw-vblank-counter-ts-for-new-drm_up.patch | 444 |
1 files changed, 0 insertions, 444 deletions
diff --git a/common/recipes-kernel/linux/files/0713-drm-amdgpu-Fixup-hw-vblank-counter-ts-for-new-drm_up.patch b/common/recipes-kernel/linux/files/0713-drm-amdgpu-Fixup-hw-vblank-counter-ts-for-new-drm_up.patch deleted file mode 100644 index d9694527..00000000 --- a/common/recipes-kernel/linux/files/0713-drm-amdgpu-Fixup-hw-vblank-counter-ts-for-new-drm_up.patch +++ /dev/null @@ -1,444 +0,0 @@ -From 8e36f9d33c134d5c6448ad65b423a9fd94e045cf Mon Sep 17 00:00:00 2001 -From: Alex Deucher <alexander.deucher@amd.com> -Date: Thu, 3 Dec 2015 12:31:56 -0500 -Subject: [PATCH 0713/1565] drm/amdgpu: Fixup hw vblank counter/ts for new - drm_update_vblank_count() (v3) - -commit 4dfd6486 "drm: Use vblank timestamps to guesstimate how many -vblanks were missed" introduced in Linux 4.4-rc1 makes the drm core -more fragile to drivers which don't update hw vblank counters and -vblank timestamps in sync with firing of the vblank irq and -essentially at leading edge of vblank. - -This exposed a problem with radeon-kms/amdgpu-kms which do not -satisfy above requirements: - -The vblank irq fires a few scanlines before start of vblank, but -programmed pageflips complete at start of vblank and -vblank timestamps update at start of vblank, whereas the -hw vblank counter increments only later, at start of vsync. - -This leads to problems like off by one errors for vblank counter -updates, vblank counters apparently going backwards or vblank -timestamps apparently having time going backwards. The net result -is stuttering of graphics in games, or little hangs, as well as -total failure of timing sensitive applications. - -See bug #93147 for an example of the regression on Linux 4.4-rc: - -https://bugs.freedesktop.org/show_bug.cgi?id=93147 - -This patch tries to align all above events better from the -viewpoint of the drm core / of external callers to fix the problem: - -1. The apparent start of vblank is shifted a few scanlines earlier, -so the vblank irq now always happens after start of this extended -vblank interval and thereby drm_update_vblank_count() always samples -the updated vblank count and timestamp of the new vblank interval. - -To achieve this, the reporting of scanout positions by -radeon_get_crtc_scanoutpos() now operates as if the vblank starts -radeon_crtc->lb_vblank_lead_lines before the real start of the hw -vblank interval. This means that the vblank timestamps which are based -on these scanout positions will now update at this earlier start of -vblank. - -2. The driver->get_vblank_counter() function will bump the returned -vblank count as read from the hw by +1 if the query happens after -the shifted earlier start of the vblank, but before the real hw increment -at start of vsync, so the counter appears to increment at start of vblank -in sync with the timestamp update. - -3. Calls from vblank irq-context and regular non-irq calls are now -treated identical, always simulating the shifted vblank start, to -avoid inconsistent results for queries happening from vblank irq vs. -happening from drm_vblank_enable() or vblank_disable_fn(). - -4. The radeon_flip_work_func will delay mmio programming a pageflip until -the start of the real vblank iff it happens to execute inside the shifted -earlier start of the vblank, so pageflips now also appear to execute at -start of the shifted vblank, in sync with vblank counter and timestamp -updates. This to avoid some races between updates of vblank count and -timestamps that are used for swap scheduling and pageflip execution which -could cause pageflips to execute before the scheduled target vblank. - -The lb_vblank_lead_lines "fudge" value is calculated as the size of -the display controllers line buffer in scanlines for the given video -mode: Vblank irq's are triggered by the line buffer logic when the line -buffer refill for a video frame ends, ie. when the line buffer source read -position enters the hw vblank. This means that a vblank irq could fire at -most as many scanlines before the current reported scanout position of the -crtc timing generator as the number of scanlines the line buffer can -maximally hold for a given video mode. - -This patch has been successfully tested on a RV730 card with DCE-3 display -engine and on a evergreen card with DCE-4 display engine, in single-display -and dual-display configuration, with different video modes. - -A similar patch is needed for amdgpu-kms to fix the same problem. - -Limitations: - -- Maybe replace the udelay() in the flip_work_func() by a suitable - usleep_range() for a bit better efficiency? Will try that. - -- Line buffer sizes in pixels are hard-coded on < DCE-4 to a value - i just guessed to be high enough to work ok, lacking info on the true - sizes atm. - -Probably fixes: fdo#93147 - -Port of Mario's radeon fix to amdgpu. - -Signed-off-by: Alex Deucher <alexander.deucher@amd.com> -(v1) Reviewed-by: Mario Kleiner <mario.kleiner.de@gmail.com> - -(v2) Refine amdgpu_flip_work_func() for better efficiency. - - In amdgpu_flip_work_func, replace the busy waiting udelay(5) - with event lock held by a more performance and energy efficient - usleep_range() until at least predicted true start of hw vblank, - with some slack for scheduler happiness. Release the event lock - during waits to not delay other outputs in doing their stuff, as - the waiting can last up to 200 usecs in some cases. - - Also small fix to code comment and formatting in that function. - -(v2) Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com> - -(v3) Fix crash in crtc disabled case ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 102 +++++++++++++++++++++------- - drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 48 ++++++++++++- - drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 5 ++ - drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 5 +- - drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 5 +- - drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 5 +- - 6 files changed, 140 insertions(+), 30 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c -index ddd7233..5580d34 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c -@@ -73,6 +73,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work) - struct drm_crtc *crtc = &amdgpuCrtc->base; - unsigned long flags; - unsigned i; -+ int vpos, hpos, stat, min_udelay; -+ struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; - - amdgpu_flip_wait_fence(adev, &work->excl); - for (i = 0; i < work->shared_count; ++i) -@@ -81,6 +83,41 @@ static void amdgpu_flip_work_func(struct work_struct *__work) - /* We borrow the event spin lock for protecting flip_status */ - spin_lock_irqsave(&crtc->dev->event_lock, flags); - -+ /* If this happens to execute within the "virtually extended" vblank -+ * interval before the start of the real vblank interval then it needs -+ * to delay programming the mmio flip until the real vblank is entered. -+ * This prevents completing a flip too early due to the way we fudge -+ * our vblank counter and vblank timestamps in order to work around the -+ * problem that the hw fires vblank interrupts before actual start of -+ * vblank (when line buffer refilling is done for a frame). It -+ * complements the fudging logic in amdgpu_get_crtc_scanoutpos() for -+ * timestamping and amdgpu_get_vblank_counter_kms() for vblank counts. -+ * -+ * In practice this won't execute very often unless on very fast -+ * machines because the time window for this to happen is very small. -+ */ -+ for (;;) { -+ /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank -+ * start in hpos, and to the "fudged earlier" vblank start in -+ * vpos. -+ */ -+ stat = amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, -+ GET_DISTANCE_TO_VBLANKSTART, -+ &vpos, &hpos, NULL, NULL, -+ &crtc->hwmode); -+ -+ if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) != -+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) || -+ !(vpos >= 0 && hpos <= 0)) -+ break; -+ -+ /* Sleep at least until estimated real start of hw vblank */ -+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags); -+ min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5); -+ usleep_range(min_udelay, 2 * min_udelay); -+ spin_lock_irqsave(&crtc->dev->event_lock, flags); -+ }; -+ - /* do the flip (mmio) */ - adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); - /* set the flip status */ -@@ -712,6 +749,15 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, - * \param dev Device to query. - * \param pipe Crtc to query. - * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0). -+ * For driver internal use only also supports these flags: -+ * -+ * USE_REAL_VBLANKSTART to use the real start of vblank instead -+ * of a fudged earlier start of vblank. -+ * -+ * GET_DISTANCE_TO_VBLANKSTART to return distance to the -+ * fudged earlier start of vblank in *vpos and the distance -+ * to true start of vblank in *hpos. -+ * - * \param *vpos Location where vertical scanout position should be stored. - * \param *hpos Location where horizontal scanout position should go. - * \param *stime Target location for timestamp taken immediately before -@@ -776,10 +822,40 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, - vbl_end = 0; - } - -+ /* Called from driver internal vblank counter query code? */ -+ if (flags & GET_DISTANCE_TO_VBLANKSTART) { -+ /* Caller wants distance from real vbl_start in *hpos */ -+ *hpos = *vpos - vbl_start; -+ } -+ -+ /* Fudge vblank to start a few scanlines earlier to handle the -+ * problem that vblank irqs fire a few scanlines before start -+ * of vblank. Some driver internal callers need the true vblank -+ * start to be used and signal this via the USE_REAL_VBLANKSTART flag. -+ * -+ * The cause of the "early" vblank irq is that the irq is triggered -+ * by the line buffer logic when the line buffer read position enters -+ * the vblank, whereas our crtc scanout position naturally lags the -+ * line buffer read position. -+ */ -+ if (!(flags & USE_REAL_VBLANKSTART)) -+ vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines; -+ - /* Test scanout position against vblank region. */ - if ((*vpos < vbl_start) && (*vpos >= vbl_end)) - in_vbl = false; - -+ /* In vblank? */ -+ if (in_vbl) -+ ret |= DRM_SCANOUTPOS_IN_VBLANK; -+ -+ /* Called from driver internal vblank counter query code? */ -+ if (flags & GET_DISTANCE_TO_VBLANKSTART) { -+ /* Caller wants distance from fudged earlier vbl_start */ -+ *vpos -= vbl_start; -+ return ret; -+ } -+ - /* Check if inside vblank area and apply corrective offsets: - * vpos will then be >=0 in video scanout area, but negative - * within vblank area, counting down the number of lines until -@@ -795,32 +871,6 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, - /* Correct for shifted end of vbl at vbl_end. */ - *vpos = *vpos - vbl_end; - -- /* In vblank? */ -- if (in_vbl) -- ret |= DRM_SCANOUTPOS_IN_VBLANK; -- -- /* Is vpos outside nominal vblank area, but less than -- * 1/100 of a frame height away from start of vblank? -- * If so, assume this isn't a massively delayed vblank -- * interrupt, but a vblank interrupt that fired a few -- * microseconds before true start of vblank. Compensate -- * by adding a full frame duration to the final timestamp. -- * Happens, e.g., on ATI R500, R600. -- * -- * We only do this if DRM_CALLED_FROM_VBLIRQ. -- */ -- if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) { -- vbl_start = mode->crtc_vdisplay; -- vtotal = mode->crtc_vtotal; -- -- if (vbl_start - *vpos < vtotal / 100) { -- *vpos -= vtotal; -- -- /* Signal this correction as "applied". */ -- ret |= 0x8; -- } -- } -- - return ret; - } - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c -index 1618e22..e23843f 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c -@@ -611,13 +611,59 @@ void amdgpu_driver_preclose_kms(struct drm_device *dev, - u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe) - { - struct amdgpu_device *adev = dev->dev_private; -+ int vpos, hpos, stat; -+ u32 count; - - if (pipe >= adev->mode_info.num_crtc) { - DRM_ERROR("Invalid crtc %u\n", pipe); - return -EINVAL; - } - -- return amdgpu_display_vblank_get_counter(adev, pipe); -+ /* The hw increments its frame counter at start of vsync, not at start -+ * of vblank, as is required by DRM core vblank counter handling. -+ * Cook the hw count here to make it appear to the caller as if it -+ * incremented at start of vblank. We measure distance to start of -+ * vblank in vpos. vpos therefore will be >= 0 between start of vblank -+ * and start of vsync, so vpos >= 0 means to bump the hw frame counter -+ * result by 1 to give the proper appearance to caller. -+ */ -+ if (adev->mode_info.crtcs[pipe]) { -+ /* Repeat readout if needed to provide stable result if -+ * we cross start of vsync during the queries. -+ */ -+ do { -+ count = amdgpu_display_vblank_get_counter(adev, pipe); -+ /* Ask amdgpu_get_crtc_scanoutpos to return vpos as -+ * distance to start of vblank, instead of regular -+ * vertical scanout pos. -+ */ -+ stat = amdgpu_get_crtc_scanoutpos( -+ dev, pipe, GET_DISTANCE_TO_VBLANKSTART, -+ &vpos, &hpos, NULL, NULL, -+ &adev->mode_info.crtcs[pipe]->base.hwmode); -+ } while (count != amdgpu_display_vblank_get_counter(adev, pipe)); -+ -+ if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) != -+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) { -+ DRM_DEBUG_VBL("Query failed! stat %d\n", stat); -+ } else { -+ DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n", -+ pipe, vpos); -+ -+ /* Bump counter if we are at >= leading edge of vblank, -+ * but before vsync where vpos would turn negative and -+ * the hw counter really increments. -+ */ -+ if (vpos >= 0) -+ count++; -+ } -+ } else { -+ /* Fallback to use value as is. */ -+ count = amdgpu_display_vblank_get_counter(adev, pipe); -+ DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n"); -+ } -+ -+ return count; - } - - /** -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h -index b62c171..064ebb3 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h -@@ -407,6 +407,7 @@ struct amdgpu_crtc { - u32 line_time; - u32 wm_low; - u32 wm_high; -+ u32 lb_vblank_lead_lines; - struct drm_display_mode hw_mode; - }; - -@@ -528,6 +529,10 @@ struct amdgpu_framebuffer { - #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \ - ((em) == ATOM_ENCODER_MODE_DP_MST)) - -+/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ -+#define USE_REAL_VBLANKSTART (1 << 30) -+#define GET_DISTANCE_TO_VBLANKSTART (1 << 31) -+ - void amdgpu_link_encoder_connector(struct drm_device *dev); - - struct drm_connector * -diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c -index cb0f774..4dcc8fb 100644 ---- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c -+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c -@@ -1250,7 +1250,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, - u32 pixel_period; - u32 line_time = 0; - u32 latency_watermark_a = 0, latency_watermark_b = 0; -- u32 tmp, wm_mask; -+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0; - - if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; -@@ -1333,6 +1333,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, - (adev->mode_info.disp_priority == 2)) { - DRM_DEBUG_KMS("force priority to high\n"); - } -+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); - } - - /* select wm A */ -@@ -1357,6 +1358,8 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, - amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; -+ /* Save number of lines the linebuffer leads before the scanout */ -+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; - } - - /** -diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c -index 5af3721..8f1e511 100644 ---- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c -+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c -@@ -1238,7 +1238,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, - u32 pixel_period; - u32 line_time = 0; - u32 latency_watermark_a = 0, latency_watermark_b = 0; -- u32 tmp, wm_mask; -+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0; - - if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; -@@ -1321,6 +1321,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, - (adev->mode_info.disp_priority == 2)) { - DRM_DEBUG_KMS("force priority to high\n"); - } -+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); - } - - /* select wm A */ -@@ -1345,6 +1346,8 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, - amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; -+ /* Save number of lines the linebuffer leads before the scanout */ -+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; - } - - /** -diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c -index 4f7b49a..42d954d 100644 ---- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c -+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c -@@ -1193,7 +1193,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, - u32 pixel_period; - u32 line_time = 0; - u32 latency_watermark_a = 0, latency_watermark_b = 0; -- u32 tmp, wm_mask; -+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0; - - if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; -@@ -1276,6 +1276,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, - (adev->mode_info.disp_priority == 2)) { - DRM_DEBUG_KMS("force priority to high\n"); - } -+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); - } - - /* select wm A */ -@@ -1302,6 +1303,8 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, - amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; -+ /* Save number of lines the linebuffer leads before the scanout */ -+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; - } - - /** --- -1.9.1 - |