From 66e789104fc2c3b9683c1128ca4f369766ce3e70 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 21 Sep 2017 20:01:45 -0400 Subject: [PATCH 2106/4131] drm/amdgpu: Save KFD VM fault info outside of prink_ratelimit The printk_ratelimit is meant to avoid flooding the log. But it should not prevent saving of fault info for KFD. This fixes intermittent bugs when running VM fault tests with --gtest_repeat. Change-Id: Id077aa78d08ab533b812a78f9fa124ecb1d73c05 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 39 +++++++++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 39 +++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 322edfe..0cc140a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -755,7 +755,6 @@ static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client) { u32 mc_id; - struct kfd_vm_fault_info *info = adev->mc.vm_fault_info; u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, PROTECTIONS); @@ -770,19 +769,6 @@ static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, MEMORY_CLIENT_RW) ? "write" : "read", block, mc_client, mc_id); - - if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->mc.vm_fault_info_updated)) { - info->vmid = vmid; - info->mc_id = mc_id; - info->page_addr = addr; - info->prot_valid = protections & 0x7 ? true : false; - info->prot_read = protections & 0x8 ? true : false; - info->prot_write = protections & 0x10 ? true : false; - info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->mc.vm_fault_info_updated, 1); - } } @@ -1261,7 +1247,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - u32 addr, status, mc_client; + u32 addr, status, mc_client, vmid; addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); @@ -1285,6 +1271,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client); } + vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, + VMID); + if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) + && !atomic_read(&adev->mc.vm_fault_info_updated)) { + struct kfd_vm_fault_info *info = adev->mc.vm_fault_info; + u32 protections = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + PROTECTIONS); + + info->vmid = vmid; + info->mc_id = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + MEMORY_CLIENT_ID); + info->status = status; + info->page_addr = addr; + info->prot_valid = protections & 0x7 ? true : false; + info->prot_read = protections & 0x8 ? true : false; + info->prot_write = protections & 0x10 ? true : false; + info->prot_exec = protections & 0x20 ? true : false; + mb(); + atomic_set(&adev->mc.vm_fault_info_updated, 1); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ed6b88f..a58d93d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -975,7 +975,6 @@ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client) { u32 mc_id; - struct kfd_vm_fault_info *info = adev->mc.vm_fault_info; u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, PROTECTIONS); @@ -990,19 +989,6 @@ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, MEMORY_CLIENT_RW) ? "write" : "read", block, mc_client, mc_id); - - if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->mc.vm_fault_info_updated)) { - info->vmid = vmid; - info->mc_id = mc_id; - info->page_addr = addr; - info->prot_valid = protections & 0x7 ? true : false; - info->prot_read = protections & 0x8 ? true : false; - info->prot_write = protections & 0x10 ? true : false; - info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->mc.vm_fault_info_updated, 1); - } } static int gmc_v8_0_convert_vram_type(int mc_seq_vram_type) @@ -1408,7 +1394,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - u32 addr, status, mc_client; + u32 addr, status, mc_client, vmid; if (amdgpu_sriov_vf(adev)) { dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", @@ -1439,6 +1425,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client); } + vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, + VMID); + if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) + && !atomic_read(&adev->mc.vm_fault_info_updated)) { + struct kfd_vm_fault_info *info = adev->mc.vm_fault_info; + u32 protections = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + PROTECTIONS); + + info->vmid = vmid; + info->mc_id = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + MEMORY_CLIENT_ID); + info->status = status; + info->page_addr = addr; + info->prot_valid = protections & 0x7 ? true : false; + info->prot_read = protections & 0x8 ? true : false; + info->prot_write = protections & 0x10 ? true : false; + info->prot_exec = protections & 0x20 ? true : false; + mb(); + atomic_set(&adev->mc.vm_fault_info_updated, 1); + } + return 0; } -- 2.7.4