aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3624-drm-amdgpu-cleanups-for-vram-lost-handling.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3624-drm-amdgpu-cleanups-for-vram-lost-handling.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3624-drm-amdgpu-cleanups-for-vram-lost-handling.patch270
1 files changed, 270 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3624-drm-amdgpu-cleanups-for-vram-lost-handling.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3624-drm-amdgpu-cleanups-for-vram-lost-handling.patch
new file mode 100644
index 00000000..a0dcfbce
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3624-drm-amdgpu-cleanups-for-vram-lost-handling.patch
@@ -0,0 +1,270 @@
+From 14b62f16474e181a70e786caee45908cd6a45fdf Mon Sep 17 00:00:00 2001
+From: Monk Liu <Monk.Liu@amd.com>
+Date: Mon, 25 Dec 2017 11:59:27 +0800
+Subject: [PATCH 3624/4131] drm/amdgpu: cleanups for vram lost handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+1)create a routine "handle_vram_lost" to do the vram
+recovery, and put it into amdgpu_device_reset/reset_sriov,
+this way no need of the extra paramter to hold the
+VRAM LOST information and the related macros can be removed.
+
+3)show vram_recover failure if time out, and set TMO equal to
+lockup_timeout if vram_recover is under SRIOV runtime mode.
+
+4)report error if any ip reset failed for SR-IOV
+
+Change-Id: I686e2b6133844c14948c206a2315c064a78c1d9c
+Signed-off-by: Monk Liu <Monk.Liu@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 -
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 138 +++++++++++++++--------------
+ 2 files changed, 72 insertions(+), 70 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 37e43fd..d784389 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -183,10 +183,6 @@ extern int amdgpu_cik_support;
+ #define CIK_CURSOR_WIDTH 128
+ #define CIK_CURSOR_HEIGHT 128
+
+-/* GPU RESET flags */
+-#define AMDGPU_RESET_INFO_VRAM_LOST (1 << 0)
+-#define AMDGPU_RESET_INFO_FULLRESET (1 << 1)
+-
+ struct amdgpu_device;
+ struct amdgpu_ib;
+ struct amdgpu_cs_parser;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 5a83045..84adc73 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -1620,6 +1620,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
+
+ r = block->version->funcs->hw_init(adev);
+ DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
++ if (r)
++ return r;
+ }
+ }
+
+@@ -1653,6 +1655,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
+
+ r = block->version->funcs->hw_init(adev);
+ DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
++ if (r)
++ return r;
+ }
+ }
+
+@@ -2502,17 +2506,71 @@ static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
+ return r;
+ }
+
++static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
++{
++ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
++ struct amdgpu_bo *bo, *tmp;
++ struct dma_fence *fence = NULL, *next = NULL;
++ long r = 1;
++ int i = 0;
++ long tmo;
++
++ if (amdgpu_sriov_runtime(adev))
++ tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
++ else
++ tmo = msecs_to_jiffies(100);
++
++ DRM_INFO("recover vram bo from shadow start\n");
++ mutex_lock(&adev->shadow_list_lock);
++ list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
++ next = NULL;
++ amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
++ if (fence) {
++ r = dma_fence_wait_timeout(fence, false, tmo);
++ if (r == 0)
++ pr_err("wait fence %p[%d] timeout\n", fence, i);
++ else if (r < 0)
++ pr_err("wait fence %p[%d] interrupted\n", fence, i);
++ if (r < 1) {
++ dma_fence_put(fence);
++ fence = next;
++ break;
++ }
++ i++;
++ }
++
++ dma_fence_put(fence);
++ fence = next;
++ }
++ mutex_unlock(&adev->shadow_list_lock);
++
++ if (fence) {
++ r = dma_fence_wait_timeout(fence, false, tmo);
++ if (r == 0)
++ pr_err("wait fence %p[%d] timeout\n", fence, i);
++ else if (r < 0)
++ pr_err("wait fence %p[%d] interrupted\n", fence, i);
++
++ }
++ dma_fence_put(fence);
++
++ if (r > 0)
++ DRM_INFO("recover vram bo from shadow done\n");
++ else
++ DRM_ERROR("recover vram bo from shadow failed\n");
++
++ return (r > 0?0:1);
++}
++
+ /*
+ * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
+ *
+ * @adev: amdgpu device pointer
+- * @reset_flags: output param tells caller the reset result
+ *
+ * attempt to do soft-reset or full-reset and reinitialize Asic
+ * return 0 means successed otherwise failed
+ */
+-static int amdgpu_device_reset(struct amdgpu_device *adev,
+- uint64_t* reset_flags)
++static int amdgpu_device_reset(struct amdgpu_device *adev)
+ {
+ bool need_full_reset, vram_lost = 0;
+ int r;
+@@ -2527,7 +2585,6 @@ static int amdgpu_device_reset(struct amdgpu_device *adev,
+ DRM_INFO("soft reset failed, will fallback to full reset!\n");
+ need_full_reset = true;
+ }
+-
+ }
+
+ if (need_full_reset) {
+@@ -2576,13 +2633,8 @@ static int amdgpu_device_reset(struct amdgpu_device *adev,
+ }
+ }
+
+- if (reset_flags) {
+- if (vram_lost)
+- (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
+-
+- if (need_full_reset)
+- (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
+- }
++ if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
++ r = amdgpu_device_handle_vram_lost(adev);
+
+ return r;
+ }
+@@ -2591,14 +2643,11 @@ static int amdgpu_device_reset(struct amdgpu_device *adev,
+ * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
+ *
+ * @adev: amdgpu device pointer
+- * @reset_flags: output param tells caller the reset result
+ *
+ * do VF FLR and reinitialize Asic
+ * return 0 means successed otherwise failed
+ */
+-static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
+- uint64_t *reset_flags,
+- bool from_hypervisor)
++static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, bool from_hypervisor)
+ {
+ int r;
+
+@@ -2619,27 +2668,18 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
+
+ /* now we are okay to resume SMC/CP/SDMA */
+ r = amdgpu_device_ip_reinit_late_sriov(adev);
++ amdgpu_virt_release_full_gpu(adev, true);
+ if (r)
+ goto error;
+
+ amdgpu_irq_gpu_reset_resume_helper(adev);
+ r = amdgpu_ib_ring_tests(adev);
+- if (r)
+- dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
++ if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
++ atomic_inc(&adev->vram_lost_counter);
++ r = amdgpu_device_handle_vram_lost(adev);
++ }
+
+ error:
+- /* release full control of GPU after ib test */
+- amdgpu_virt_release_full_gpu(adev, true);
+-
+- if (reset_flags) {
+- if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+- (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
+- atomic_inc(&adev->vram_lost_counter);
+- }
+-
+- /* VF FLR or hotlink reset is always full-reset */
+- (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
+- }
+
+ return r;
+ }
+@@ -2658,7 +2698,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job, bool force)
+ {
+ struct drm_atomic_state *state = NULL;
+- uint64_t reset_flags = 0;
+ int i, r, resched;
+
+ if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
+@@ -2706,42 +2745,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ }
+
+ if (amdgpu_sriov_vf(adev))
+- r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true);
+- else
+- r = amdgpu_device_reset(adev, &reset_flags);
+-
+- if (!r) {
+- if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) ||
+- (reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) {
+- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+- struct amdgpu_bo *bo, *tmp;
+- struct dma_fence *fence = NULL, *next = NULL;
+-
+- DRM_INFO("recover vram bo from shadow\n");
+- mutex_lock(&adev->shadow_list_lock);
+- list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+- next = NULL;
+- amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
+- if (fence) {
+- r = dma_fence_wait(fence, false);
+- if (r) {
+- WARN(r, "recovery from shadow isn't completed\n");
+- break;
+- }
+- }
+-
+- dma_fence_put(fence);
+- fence = next;
+- }
+- mutex_unlock(&adev->shadow_list_lock);
+- if (fence) {
+- r = dma_fence_wait(fence, false);
+- if (r)
+- WARN(r, "recovery from shadow isn't completed\n");
+- }
+- dma_fence_put(fence);
+- }
+- }
++ r = amdgpu_device_reset_sriov(adev, job ? false : true);
++ else
++ r = amdgpu_device_reset(adev);
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+--
+2.7.4
+