From 66e789104fc2c3b9683c1128ca4f369766ce3e70 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 21 Sep 2017 20:01:45 -0400
Subject: [PATCH 2106/4131] drm/amdgpu: Save KFD VM fault info outside of
 prink_ratelimit

The printk_ratelimit is meant to avoid flooding the log. But it should
not prevent saving of fault info for KFD. This fixes intermittent bugs
when running VM fault tests with --gtest_repeat.

Change-Id: Id077aa78d08ab533b812a78f9fa124ecb1d73c05
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 39 +++++++++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 39 +++++++++++++++++++++--------------
 2 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 322edfe..0cc140a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -755,7 +755,6 @@ static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev,
 				     u32 status, u32 addr, u32 mc_client)
 {
 	u32 mc_id;
-	struct kfd_vm_fault_info *info = adev->mc.vm_fault_info;
 	u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
 	u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 					PROTECTIONS);
@@ -770,19 +769,6 @@ static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev,
 	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			     MEMORY_CLIENT_RW) ?
 	       "write" : "read", block, mc_client, mc_id);
-
-	if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
-		&& !atomic_read(&adev->mc.vm_fault_info_updated)) {
-		info->vmid = vmid;
-		info->mc_id = mc_id;
-		info->page_addr = addr;
-		info->prot_valid = protections & 0x7 ? true : false;
-		info->prot_read = protections & 0x8 ? true : false;
-		info->prot_write = protections & 0x10 ? true : false;
-		info->prot_exec = protections & 0x20 ? true : false;
-		mb();
-		atomic_set(&adev->mc.vm_fault_info_updated, 1);
-	}
 }
 
 
@@ -1261,7 +1247,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
 				      struct amdgpu_irq_src *source,
 				      struct amdgpu_iv_entry *entry)
 {
-	u32 addr, status, mc_client;
+	u32 addr, status, mc_client, vmid;
 
 	addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
 	status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
@@ -1285,6 +1271,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
 		gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client);
 	}
 
+	vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+			     VMID);
+	if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+		&& !atomic_read(&adev->mc.vm_fault_info_updated)) {
+		struct kfd_vm_fault_info *info = adev->mc.vm_fault_info;
+		u32 protections = REG_GET_FIELD(status,
+					VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+					PROTECTIONS);
+
+		info->vmid = vmid;
+		info->mc_id = REG_GET_FIELD(status,
+					    VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+					    MEMORY_CLIENT_ID);
+		info->status = status;
+		info->page_addr = addr;
+		info->prot_valid = protections & 0x7 ? true : false;
+		info->prot_read = protections & 0x8 ? true : false;
+		info->prot_write = protections & 0x10 ? true : false;
+		info->prot_exec = protections & 0x20 ? true : false;
+		mb();
+		atomic_set(&adev->mc.vm_fault_info_updated, 1);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index ed6b88f..a58d93d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -975,7 +975,6 @@ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev,
 				     u32 status, u32 addr, u32 mc_client)
 {
 	u32 mc_id;
-	struct kfd_vm_fault_info *info = adev->mc.vm_fault_info;
 	u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
 	u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 					PROTECTIONS);
@@ -990,19 +989,6 @@ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev,
 	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			     MEMORY_CLIENT_RW) ?
 	       "write" : "read", block, mc_client, mc_id);
-
-	if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
-		&& !atomic_read(&adev->mc.vm_fault_info_updated)) {
-		info->vmid = vmid;
-		info->mc_id = mc_id;
-		info->page_addr = addr;
-		info->prot_valid = protections & 0x7 ? true : false;
-		info->prot_read = protections & 0x8 ? true : false;
-		info->prot_write = protections & 0x10 ? true : false;
-		info->prot_exec = protections & 0x20 ? true : false;
-		mb();
-		atomic_set(&adev->mc.vm_fault_info_updated, 1);
-	}
 }
 
 static int gmc_v8_0_convert_vram_type(int mc_seq_vram_type)
@@ -1408,7 +1394,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
 				      struct amdgpu_irq_src *source,
 				      struct amdgpu_iv_entry *entry)
 {
-	u32 addr, status, mc_client;
+	u32 addr, status, mc_client, vmid;
 
 	if (amdgpu_sriov_vf(adev)) {
 		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
@@ -1439,6 +1425,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
 		gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client);
 	}
 
+	vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+			     VMID);
+	if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+		&& !atomic_read(&adev->mc.vm_fault_info_updated)) {
+		struct kfd_vm_fault_info *info = adev->mc.vm_fault_info;
+		u32 protections = REG_GET_FIELD(status,
+					VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+					PROTECTIONS);
+
+		info->vmid = vmid;
+		info->mc_id = REG_GET_FIELD(status,
+					    VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+					    MEMORY_CLIENT_ID);
+		info->status = status;
+		info->page_addr = addr;
+		info->prot_valid = protections & 0x7 ? true : false;
+		info->prot_read = protections & 0x8 ? true : false;
+		info->prot_write = protections & 0x10 ? true : false;
+		info->prot_exec = protections & 0x20 ? true : false;
+		mb();
+		atomic_set(&adev->mc.vm_fault_info_updated, 1);
+	}
+
 	return 0;
 }
 
-- 
2.7.4