From d9c13156a628cc9f8f062f2c10e2bff55b92aaab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 28 Sep 2015 12:31:26 +0200 Subject: [PATCH 0610/1565] drm/amdgpu: add option to stop on VM fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 40 ++++++++++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 36 +++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index afc9848..a3dbbd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -79,6 +79,7 @@ extern int amdgpu_bapm; extern int amdgpu_deep_color; extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; +extern int amdgpu_vm_fault_stop; extern int amdgpu_enable_scheduler; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; @@ -960,6 +961,11 @@ struct amdgpu_ring { #define AMDGPU_PTE_FRAG_64KB (4 << 7) #define AMDGPU_LOG2_PAGES_PER_FRAG 4 +/* How to programm VM fault handling */ +#define AMDGPU_VM_FAULT_STOP_NEVER 0 +#define AMDGPU_VM_FAULT_STOP_FIRST 1 +#define AMDGPU_VM_FAULT_STOP_ALWAYS 2 + struct amdgpu_vm_pt { struct amdgpu_bo *bo; uint64_t addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index c183772..bec0916 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -75,6 +75,7 @@ int amdgpu_bapm = -1; int amdgpu_deep_color = 0; int amdgpu_vm_size = 8; int amdgpu_vm_block_size = -1; +int amdgpu_vm_fault_stop = 0; int amdgpu_exp_hw_support = 0; int amdgpu_enable_scheduler = 1; int amdgpu_sched_jobs = 16; @@ -141,6 +142,9 @@ module_param_named(vm_size, amdgpu_vm_size, int, 0444); MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); +MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)"); +module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); + MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index fab5471..4883482 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -436,6 +436,33 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev, } /** + * gmc_v8_0_set_fault_enable_default - update VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void gmc_v7_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + + tmp = RREG32(mmVM_CONTEXT1_CNTL); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + WREG32(mmVM_CONTEXT1_CNTL, tmp); +} + +/** * gmc_v7_0_gart_enable - gart enable * * @adev: amdgpu_device pointer @@ -523,15 +550,13 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) tmp = RREG32(mmVM_CONTEXT1_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, amdgpu_vm_block_size - 9); WREG32(mmVM_CONTEXT1_CNTL, tmp); + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + gmc_v7_0_set_fault_enable_default(adev, false); + else + gmc_v7_0_set_fault_enable_default(adev, true); if (adev->asic_type == CHIP_KAVERI) { tmp = RREG32(mmCHUB_CONTROL); @@ -1268,6 +1293,9 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, if (!addr && !status) return 0; + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST) + gmc_v7_0_set_fault_enable_default(adev, false); + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", entry->src_id, entry->src_data); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7bc9e9f..42b5ff8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -550,6 +550,35 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev, } /** + * gmc_v8_0_set_fault_enable_default - update VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + + tmp = RREG32(mmVM_CONTEXT1_CNTL); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + WREG32(mmVM_CONTEXT1_CNTL, tmp); +} + +/** * gmc_v8_0_gart_enable - gart enable * * @adev: amdgpu_device pointer @@ -663,6 +692,10 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, amdgpu_vm_block_size - 9); WREG32(mmVM_CONTEXT1_CNTL, tmp); + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + gmc_v8_0_set_fault_enable_default(adev, false); + else + gmc_v8_0_set_fault_enable_default(adev, true); gmc_v8_0_gart_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", @@ -1268,6 +1301,9 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, if (!addr && !status) return 0; + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST) + gmc_v8_0_set_fault_enable_default(adev, false); + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", entry->src_id, entry->src_data); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", -- 1.9.1