From ac8cec0b9f0e9f9d0e551845c6f80e0b87873074 Mon Sep 17 00:00:00 2001 From: Kalyan Alle Date: Fri, 20 Apr 2018 13:59:10 +0530 Subject: [PATCH 1867/4131] fixing compilation issues V1 This patch fixes the compilation issues while porting. Till the commit b45016c9a2b6563666bb90adfc982a92b18678cc from branch: amd-18.10, repo:brahma/ec/linux Signed-off-by: kalyan alle --- drivers/gpu/drm/Kconfig | 3 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 38 +- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 76 +- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 75 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 27 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 +- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 83 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 - drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 - drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 - drivers/gpu/drm/amd/amdgpu/si_dma.c | 2 - drivers/gpu/drm/amd/amdkfd/kfd_events.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_ipc.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 25 +- .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 34 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 13 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h | 5 + .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 39 +- .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.h | 10 +- .../drm/amd/display/amdgpu_dm/amdgpu_dm_services.c | 3 - drivers/gpu/drm/amd/display/dc/dm_services.h | 4 - drivers/gpu/drm/amd/lib/Makefile | 12 +- drivers/gpu/drm/amd/lib/backport/Makefile | 5 + drivers/gpu/drm/amd/lib/backport/backport.h | 6 + drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 7 +- .../amd/powerplay/hwmgr/process_pptables_v1_0.c | 4 +- drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c | 3 - drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 15 +- drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h | 2 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 23 +- drivers/gpu/drm/drm_vblank.c | 1 + drivers/gpu/drm/radeon/cik_reg.h | 11 + drivers/gpu/drm/radeon/cikd.h | 10 + drivers/gpu/drm/radeon/radeon.h | 12 + drivers/gpu/drm/radeon/radeon_kfd.c | 890 ++++++++++++++++++++- include/drm/drm_fb_helper.h | 31 + include/drm/drm_vblank.h | 1 + include/uapi/drm/amdgpu_drm.h | 2 + 61 files changed, 1256 insertions(+), 377 deletions(-) create mode 100644 drivers/gpu/drm/amd/lib/backport/Makefile create mode 100644 drivers/gpu/drm/amd/lib/backport/backport.h diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 83cb2a8..e36bb0d 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -164,6 +164,7 @@ config DRM_RADEON select BACKLIGHT_CLASS_DEVICE select BACKLIGHT_LCD_SUPPORT select INTERVAL_TREE + select CHASH help Choose this option if you have an ATI Radeon graphics card. There are both PCI and AGP versions. You don't need to choose this to @@ -191,6 +192,8 @@ config DRM_AMDGPU source "drivers/gpu/drm/amd/amdgpu/Kconfig" +source "drivers/gpu/drm/amd/lib/Kconfig" + source "drivers/gpu/drm/nouveau/Kconfig" source "drivers/gpu/drm/i915/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 8ce0703..0105e9f 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_DRM_ARM) += arm/ obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_TDFX) += tdfx/ obj-$(CONFIG_DRM_R128) += r128/ +obj-y += amd/lib/ obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 81423ee..59e624e 100755 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -36,7 +36,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ - ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \ + ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 5c6452c..ad419cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -24,6 +24,7 @@ #define pr_fmt(fmt) "kfd2kgd: " fmt #include +#include #include #include #include @@ -1436,7 +1437,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, INIT_LIST_HEAD(&info->userptr_inval_list); info->eviction_fence = - amdgpu_amdkfd_fence_create(kcl_fence_context_alloc(1), + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), current->mm); if (info->eviction_fence == NULL) { pr_err("Failed to create eviction fence\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index cc4e18d..274d031 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1778,23 +1778,29 @@ bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev) void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u32 src_tmp[5], dst_tmp[5]; - int i; - u8 align_num_bytes = ALIGN(num_bytes, 4); - - if (to_le) { - memcpy(src_tmp, src, num_bytes); - for (i = 0; i < align_num_bytes / 4; i++) - dst_tmp[i] = cpu_to_le32(src_tmp[i]); - memcpy(dst, dst_tmp, align_num_bytes); - } else { - memcpy(src_tmp, src, align_num_bytes); - for (i = 0; i < align_num_bytes / 4; i++) - dst_tmp[i] = le32_to_cpu(src_tmp[i]); - memcpy(dst, dst_tmp, num_bytes); - } + u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ + u32 *dst32, *src32; + int i; + + memcpy(src_tmp, src, num_bytes); + src32 = (u32 *)src_tmp; + dst32 = (u32 *)dst_tmp; + if (to_le) { + for (i = 0; i < ((num_bytes + 3) / 4); i++) + dst32[i] = cpu_to_le32(src32[i]); + memcpy(dst, dst_tmp, num_bytes); + } else { + u8 dws = num_bytes & ~3; + for (i = 0; i < ((num_bytes + 3) / 4); i++) + dst32[i] = le32_to_cpu(src32[i]); + memcpy(dst, dst_tmp, dws); + if (num_bytes % 4) { + for (i = 0; i < (num_bytes % 4); i++) + dst[dws+i] = dst_tmp[dws+i]; + } + } #else - memcpy(dst, src, num_bytes); + memcpy(dst, src, num_bytes); #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 1ae5ae8..cde9c3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -7,6 +7,7 @@ * ATPX support for both Intel/ATI */ #include +#include #include #include #include @@ -14,16 +15,6 @@ #include "amd_acpi.h" -#define AMDGPU_PX_QUIRK_FORCE_ATPX (1 << 0) - -struct amdgpu_px_quirk { - u32 chip_vendor; - u32 chip_device; - u32 subsys_vendor; - u32 subsys_device; - u32 px_quirk_flags; -}; - struct amdgpu_atpx_functions { bool px_params; bool power_cntl; @@ -44,8 +35,9 @@ struct amdgpu_atpx { static struct amdgpu_atpx_priv { bool atpx_detected; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) bool bridge_pm_usable; - unsigned int quirks; +#endif /* handle for device - and atpx */ acpi_handle dhandle; acpi_handle other_handle; @@ -216,19 +208,20 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { - if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) { - printk("ATPX Hybrid Graphics, forcing to ATPX\n"); - atpx->functions.power_cntl = true; - atpx->is_hybrid = false; - } else { - printk("ATPX Hybrid Graphics\n"); + printk("ATPX Hybrid Graphics\n"); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) /* * Disable legacy PM methods only when pcie port PM is usable, * otherwise the device might fail to power off or power on. */ atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable; - atpx->is_hybrid = true; - } +#else + /* + * This is a temporary hack for the kernel doesn't support D3. + */ + atpx->functions.power_cntl = true; +#endif + atpx->is_hybrid = true; } atpx->dgpu_req_power_for_displays = false; @@ -557,39 +550,16 @@ static int amdgpu_atpx_get_client_id(struct pci_dev *pdev) else return VGA_SWITCHEROO_DIS; } - +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +static struct vga_switcheroo_handler amdgpu_atpx_handler = { +#else static const struct vga_switcheroo_handler amdgpu_atpx_handler = { +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) */ .switchto = amdgpu_atpx_switchto, .power_state = amdgpu_atpx_power_state, .get_client_id = amdgpu_atpx_get_client_id, }; -static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { - /* HG _PR3 doesn't seem to work on this A+A weston board */ - { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, - { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, - { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, - { 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX }, - { 0, 0, 0, 0, 0 }, -}; - -static void amdgpu_atpx_get_quirks(struct pci_dev *pdev) -{ - const struct amdgpu_px_quirk *p = amdgpu_px_quirk_list; - - /* Apply PX quirks */ - while (p && p->chip_device != 0) { - if (pdev->vendor == p->chip_vendor && - pdev->device == p->chip_device && - pdev->subsystem_vendor == p->subsys_vendor && - pdev->subsystem_device == p->subsys_device) { - amdgpu_atpx_priv.quirks |= p->px_quirk_flags; - break; - } - ++p; - } -} - /** * amdgpu_atpx_detect - detect whether we have PX * @@ -603,17 +573,20 @@ static bool amdgpu_atpx_detect(void) struct pci_dev *pdev = NULL; bool has_atpx = false; int vga_count = 0; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) bool d3_supported = false; struct pci_dev *parent_pdev; +#endif while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { vga_count++; has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; - amdgpu_atpx_get_quirks(pdev); +#endif } while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { @@ -621,9 +594,10 @@ static bool amdgpu_atpx_detect(void) has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; - amdgpu_atpx_get_quirks(pdev); +#endif } if (has_atpx && vga_count == 2) { @@ -631,7 +605,9 @@ static bool amdgpu_atpx_detect(void) pr_info("vga_switcheroo: detected switching method %s handle\n", acpi_method_name); amdgpu_atpx_priv.atpx_detected = true; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) amdgpu_atpx_priv.bridge_pm_usable = d3_supported; +#endif amdgpu_atpx_init(); return true; } @@ -646,7 +622,11 @@ static bool amdgpu_atpx_detect(void) void amdgpu_register_atpx_handler(void) { bool r; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + int handler_flags = 0; +#else enum vga_switcheroo_handler_flags_t handler_flags = 0; +#endif /* detect if we have any ATPX + 2 VGA in the system */ r = amdgpu_atpx_detect(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 057e1ec..b0b01c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -58,6 +58,12 @@ static bool check_atom_bios(uint8_t *bios, size_t size) DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]); return false; } + + tmp = bios[0x18] | (bios[0x19] << 8); + if (bios[tmp + 0x14] != 0x0) { + DRM_INFO("Not an x86 BIOS ROM\n"); + return false; + } bios_header_start = bios[0x48] | (bios[0x49] << 8); if (!bios_header_start) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6c8eca5..8d0ad30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -27,9 +27,7 @@ #include #include #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) #include -#endif #include "amdgpu.h" #include "amdgpu_trace.h" @@ -40,7 +38,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_gem_object *gobj; unsigned long size; - gobj = kcl_drm_gem_object_lookup(p->adev->ddev, p->filp, data->handle); + gobj = drm_gem_object_lookup(p->filp, data->handle); if (gobj == NULL) return -EINVAL; @@ -56,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, *offset = data->offset; - kcl_drm_gem_object_put_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { amdgpu_bo_unref(&p->uf_entry.robj); @@ -92,7 +90,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } /* get chunks */ - chunk_array_user = kcl_u64_to_user_ptr(cs->in.chunks); + chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; @@ -112,7 +110,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; - chunk_ptr = kcl_u64_to_user_ptr(chunk_array[i]); + chunk_ptr = u64_to_user_ptr(chunk_array[i]); if (copy_from_user(&user_chunk, chunk_ptr, sizeof(struct drm_amdgpu_cs_chunk))) { ret = -EFAULT; @@ -123,7 +121,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) p->chunks[i].length_dw = user_chunk.length_dw; size = p->chunks[i].length_dw; - cdata = kcl_u64_to_user_ptr(user_chunk.chunk_data); + cdata = u64_to_user_ptr(user_chunk.chunk_data); #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); @@ -491,9 +489,15 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, /* Check if we have user pages and nobody bound the BO already */ if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && lobj->user_pages) { + amdgpu_ttm_placement_from_domain(bo, + AMDGPU_GEM_DOMAIN_CPU); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, + false); + if (r) + return r; amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); - binding_userptr = true; + binding_userptr = true; } if (p->evictable == lobj) @@ -851,7 +855,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) } - r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync); + r = amdgpu_vm_handle_moved(adev, vm); if (r) return r; @@ -1327,7 +1331,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (IS_ERR(fence)) r = PTR_ERR(fence); else if (fence) { - r = kcl_fence_wait_timeout(fence, true, timeout); + r = dma_fence_wait_timeout(fence, true, timeout); dma_fence_put(fence); } else r = 1; @@ -1402,7 +1406,7 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, else if (!fence) continue; - r = kcl_fence_wait_timeout(fence, true, timeout); + r = dma_fence_wait_timeout(fence, true, timeout); dma_fence_put(fence); if (r < 0) return r; @@ -1454,11 +1458,12 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, array[i] = fence; } else { /* NULL, the fence has been already signaled */ r = 1; + first = i; goto out; } } - r = kcl_fence_wait_any_timeout(array, fence_count, true, timeout, + r = dma_fence_wait_any_timeout(array, fence_count, true, timeout, &first); if (r < 0) goto err_free_fence_array; @@ -1504,7 +1509,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, if (fences == NULL) return -ENOMEM; - fences_user = kcl_u64_to_user_ptr(wait->in.fences); + fences_user = u64_to_user_ptr(wait->in.fences); if (copy_from_user(fences, fences_user, sizeof(struct drm_amdgpu_fence) * fence_count)) { r = -EFAULT; @@ -1537,46 +1542,24 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo, struct amdgpu_bo_va_mapping **map) { + struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; - unsigned i; int r; - if (!parser->bo_list) - return 0; - addr /= AMDGPU_GPU_PAGE_SIZE; + + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); + if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) + return -EINVAL; - for (i = 0; i < parser->bo_list->num_entries; i++) { - struct amdgpu_bo_list_entry *lobj; - - lobj = &parser->bo_list->array[i]; - if (!lobj->bo_va || amdgpu_ttm_adev(lobj->bo_va->base.bo->tbo.bdev) != parser->adev) - continue; - - list_for_each_entry(mapping, &lobj->bo_va->valids, list) { - if (mapping->start > addr || - addr > mapping->last) - continue; - - *bo = lobj->bo_va->base.bo; - *map = mapping; - goto found; - } - - list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { - if (mapping->start > addr || - addr > mapping->last) - continue; - - *bo = lobj->bo_va->base.bo; - *map = mapping; - goto found; - } - } + *bo = mapping->bo_va->base.bo; + *map = mapping; - return -EINVAL; + /* Double check that the BO is reserved by this CS */ + if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) + return -EINVAL; -found: r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); if (unlikely(r)) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 51d5fe4d..ce1f1ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -404,6 +404,15 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev) */ static int amdgpu_doorbell_init(struct amdgpu_device *adev) { + /* No doorbell on SI hardware generation */ + if (adev->asic_type < CHIP_BONAIRE) { + adev->doorbell.base = 0; + adev->doorbell.size = 0; + adev->doorbell.num_doorbells = 0; + adev->doorbell.ptr = NULL; + return 0; + } + /* doorbell bar mapping */ adev->doorbell.base = pci_resource_start(adev->pdev, 2); adev->doorbell.size = pci_resource_len(adev->pdev, 2); @@ -2144,9 +2153,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - if (adev->asic_type >= CHIP_BONAIRE) - /* doorbell bar mapping */ - amdgpu_doorbell_init(adev); + /* doorbell bar mapping */ + amdgpu_doorbell_init(adev); /* io port mapping */ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { @@ -2375,8 +2383,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->rio_mem = NULL; iounmap(adev->rmmio); adev->rmmio = NULL; - if (adev->asic_type >= CHIP_BONAIRE) - amdgpu_doorbell_fini(adev); + + amdgpu_doorbell_fini(adev); amdgpu_debugfs_regs_cleanup(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index d84e352..cad11ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -71,7 +71,7 @@ * - 3.19.0 - Add support for UVD MJPEG decode */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 19 +#define KMS_DRIVER_MINOR 20 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -821,6 +821,9 @@ static struct drm_driver kms_driver = { .lastclose = amdgpu_driver_lastclose_kms, .set_busid = drm_pci_set_busid, .unload = amdgpu_driver_unload_kms, + .get_vblank_counter = amdgpu_get_vblank_counter_kms, + .enable_vblank = amdgpu_enable_vblank_kms, + .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, .get_scanout_position = amdgpu_get_crtc_scanout_position, #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 90fa8e8..8c0bea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -240,6 +240,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth); + info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &amdgpufb_ops; tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start; @@ -306,8 +307,32 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb return 0; } +/** Sets the color ramps on behalf of fbcon */ +static void amdgpu_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, + u16 blue, int regno) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + + amdgpu_crtc->lut_r[regno] = red >> 6; + amdgpu_crtc->lut_g[regno] = green >> 6; + amdgpu_crtc->lut_b[regno] = blue >> 6; +} + +/** Gets the color ramps on behalf of fbcon */ +static void amdgpu_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green, + u16 *blue, int regno) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + + *red = amdgpu_crtc->lut_r[regno] << 6; + *green = amdgpu_crtc->lut_g[regno] << 6; + *blue = amdgpu_crtc->lut_b[regno] << 6; +} + static const struct drm_fb_helper_funcs amdgpu_fb_helper_funcs = { - .fb_probe = amdgpufb_create, + .gamma_set = amdgpu_crtc_fb_gamma_set, + .gamma_get = amdgpu_crtc_fb_gamma_get, + .fb_probe = amdgpufb_create, }; int amdgpu_fbdev_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 19631d0..bbc3d1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -436,7 +436,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (r) - goto release_object; + goto unlock_mmap_sem; r = amdgpu_bo_reserve(bo, true); if (r) @@ -461,6 +461,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, free_pages: release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false); +unlock_mmap_sem: + up_read(¤t->mm->mmap_sem); + release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 48d94ae..5c13572 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -109,26 +109,9 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s } } -static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) -{ - if (amdgpu_compute_multipipe != -1) { - DRM_INFO("amdgpu: forcing compute pipe policy %d\n", - amdgpu_compute_multipipe); - return amdgpu_compute_multipipe == 1; - } - - /* FIXME: spreading the queues across pipes causes perf regressions - * on POLARIS11 compute workloads */ - if (adev->asic_type == CHIP_POLARIS11) - return false; - - return adev->gfx.mec.num_mec > 1; -} - void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe, mec; - bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); /* policy for amdgpu compute queue ownership */ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { @@ -141,8 +124,9 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) /* we've run out of HW */ if (mec >= adev->gfx.mec.num_mec) break; - - if (multipipe_policy) { + + /* FIXME: spreading the queues across pipes causes perf regressions */ + if (0) { /* policy: amdgpu owns the first two queues of the first MEC */ if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 47c5ce9..519b156 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -276,7 +276,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev) void amdgpu_irq_fini(struct amdgpu_device *adev) { unsigned i, j; - + + drm_vblank_cleanup(adev->ddev); if (adev->irq.installed) { drm_irq_uninstall(adev->ddev); adev->irq.installed = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index c870664..6c79b6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -53,7 +53,7 @@ struct amdgpu_mn { /* objects protected by lock */ struct rw_semaphore lock; - struct rb_root objects; + struct rb_root_cached objects; struct mutex read_lock; atomic_t recursion; }; @@ -80,8 +80,8 @@ static void amdgpu_mn_destroy(struct work_struct *work) mutex_lock(&adev->mn_lock); down_write(&rmn->lock); hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, - it.rb) { + rbtree_postorder_for_each_entry_safe(node, next_node, + &rmn->objects.rb_root, it.rb) { list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { bo->mn = NULL; list_del_init(&bo->mn_list); @@ -177,7 +177,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) continue; - r = kcl_reservation_object_wait_timeout_rcu(bo->tbo.resv, + r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); @@ -350,7 +350,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, rmn->type = type; rmn->mn.ops = &amdgpu_mn_ops[type]; init_rwsem(&rmn->lock); - rmn->objects = RB_ROOT; + rmn->objects = RB_ROOT_CACHED; mutex_init(&rmn->read_lock); atomic_set(&rmn->recursion, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 7aa7b6c..8ba6f62 100755 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -70,11 +70,12 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) return false; } -static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, - struct ttm_placement *placement, - struct ttm_place *places, - u32 domain, u64 flags) +void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) { + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + struct ttm_placement *placement = &abo->placement; + struct ttm_place *places = abo->placements; + u64 flags = abo->flags; u32 c = 0, i; if ((domain & AMDGPU_GEM_DOMAIN_DGMA) && amdgpu_direct_gma_size) { @@ -177,27 +178,6 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, placement->busy_placement = places; } -void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); - - amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements, - domain, abo->flags); -} - -static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, - struct ttm_placement *placement) -{ - BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1)); - - memcpy(bo->placements, placement->placement, - placement->num_placement * sizeof(struct ttm_place)); - bo->placement.num_placement = placement->num_placement; - bo->placement.num_busy_placement = placement->num_busy_placement; - bo->placement.placement = bo->placements; - bo->placement.busy_placement = bo->placements; -} - /** * amdgpu_bo_create_reserved - create reserved BO for kernel use * @@ -329,14 +309,13 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, *cpu_addr = NULL; } -int amdgpu_bo_create_restricted(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, - struct ttm_placement *placement, - struct reservation_object *resv, - uint64_t init_value, - struct amdgpu_bo **bo_ptr) +static int amdgpu_bo_do_create(struct amdgpu_device *adev, + unsigned long size, int byte_align, + bool kernel, u32 domain, u64 flags, + struct sg_table *sg, + struct reservation_object *resv, + uint64_t init_value, + struct amdgpu_bo **bo_ptr) { struct amdgpu_bo *bo; enum ttm_bo_type type; @@ -409,10 +388,11 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; #endif - amdgpu_fill_placement_to_bo(bo, placement); - /* Kernel allocation are uninterruptible */ + bo->tbo.bdev = &adev->mman.bdev; + amdgpu_ttm_placement_from_domain(bo, domain); initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); + /* Kernel allocation are uninterruptible */ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); @@ -483,27 +463,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, unsigned long size, int byte_align, struct amdgpu_bo *bo) { - struct ttm_placement placement = {0}; - struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; int r; if (bo->shadow) return 0; - memset(&placements, 0, sizeof(placements)); - amdgpu_ttm_placement_init(adev, &placement, placements, - AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW); - - r = amdgpu_bo_create_restricted(adev, size, byte_align, true, - AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW, - NULL, &placement, - bo->tbo.resv, - 0, - &bo->shadow); + r = amdgpu_bo_do_create(adev, size, byte_align, true, + AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW, + NULL, bo->tbo.resv, 0, + &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -525,18 +495,11 @@ int amdgpu_bo_create(struct amdgpu_device *adev, uint64_t init_value, struct amdgpu_bo **bo_ptr) { - struct ttm_placement placement = {0}; - struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; int r; - memset(&placements, 0, sizeof(placements)); - amdgpu_ttm_placement_init(adev, &placement, placements, - domain, parent_flags); - - r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain, - parent_flags, sg, &placement, resv, - init_value, bo_ptr); + r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain, + parent_flags, sg, resv, init_value, bo_ptr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 4ec76e8..abf207e 100755 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -35,6 +35,7 @@ /* bo virtual addresses in a vm */ struct amdgpu_bo_va_mapping { + struct amdgpu_bo_va *bo_va; struct list_head list; struct rb_node rb; uint64_t start; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index b293380..befc09b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -63,7 +63,7 @@ static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, static int amdgpu_identity_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - u32 ring, + int ring, struct amdgpu_ring **out_ring) { switch (mapper->hw_ip) { @@ -121,7 +121,7 @@ static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) static int amdgpu_lru_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - u32 user_ring, + int user_ring, struct amdgpu_ring **out_ring) { int r, i, j; @@ -208,7 +208,7 @@ int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, */ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr, - u32 hw_ip, u32 instance, u32 ring, + int hw_ip, int instance, int ring, struct amdgpu_ring **out_ring) { int r, ip_num_rings; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fd5a641..c957155 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -25,14 +25,8 @@ * Alex Deucher * Jerome Glisse */ -#if defined(BUILD_AS_DKMS) -#include -#else #include -#endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) #include -#endif #include #include #include @@ -1821,7 +1815,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, dma_addr_t *pages_addr = NULL; struct ttm_mem_reg *mem; struct drm_mm_node *nodes; - struct dma_fence *exclusive; + struct dma_fence *exclusive, **last_update; uint64_t flags; int r; @@ -1849,6 +1843,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, else flags = 0x0; + if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) + last_update = &vm->last_update; + else + last_update = &bo_va->last_pt_update; + if (!clear && bo_va->base.moved) { bo_va->base.moved = false; list_splice_init(&bo_va->valids, &bo_va->invalids); @@ -1860,7 +1859,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, list_for_each_entry(mapping, &bo_va->invalids, list) { r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, mapping, flags, mem, - &bo_va->last_pt_update); + last_update); if (r) return r; } @@ -1883,12 +1882,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, trace_amdgpu_vm_bo_mapping(mapping); } - if (bo_va->base.bo && - bo_va->base.bo->tbo.resv == vm->root.base.bo->tbo.resv) { - dma_fence_put(vm->last_update); - vm->last_update = dma_fence_get(bo_va->last_pt_update); - } - return 0; } @@ -2086,15 +2079,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_sync *sync) + struct amdgpu_vm *vm) { - struct amdgpu_bo_va *bo_va = NULL; bool clear; int r = 0; spin_lock(&vm->status_lock); while (!list_empty(&vm->moved)) { + struct amdgpu_bo_va *bo_va; + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); @@ -2110,9 +2103,6 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, } spin_unlock(&vm->status_lock); - if (bo_va) - r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); - return r; } @@ -2171,6 +2161,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo *bo = bo_va->base.bo; + mapping->bo_va = bo_va; list_add(&mapping->list, &bo_va->invalids); amdgpu_vm_it_insert(mapping, &vm->va); @@ -2348,6 +2339,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); + mapping->bo_va = NULL; trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (valid) @@ -2433,6 +2425,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, if (tmp->last > eaddr) tmp->last = eaddr; + tmp->bo_va = NULL; list_add(&tmp->list, &vm->freed); trace_amdgpu_vm_bo_unmap(NULL, tmp); } @@ -2459,6 +2452,19 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, } /** + * amdgpu_vm_bo_lookup_mapping - find mapping by address + * + * @vm: the requested VM + * + * Find a mapping by it's address. + */ +struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, + uint64_t addr) +{ + return amdgpu_vm_it_iter_first(&vm->va, addr, addr); +} + +/** * amdgpu_vm_bo_rmv - remove a bo to a specific vm * * @adev: amdgpu_device pointer @@ -2483,6 +2489,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); + mapping->bo_va = NULL; trace_amdgpu_vm_bo_unmap(bo_va, mapping); list_add(&mapping->list, &vm->freed); } @@ -2619,7 +2626,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u64 flags; uint64_t init_pde_value = 0; - vm->va = RB_ROOT; + vm->va = RB_ROOT_CACHED; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; @@ -2811,10 +2818,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amd_sched_entity_fini(vm->entity.sched, &vm->entity); - if (!RB_EMPTY_ROOT(&vm->va)) { + if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { + rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va.rb_root, rb) { list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); @@ -2862,7 +2869,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) } } - adev->vm_manager.fence_context = kcl_fence_context_alloc(AMDGPU_MAX_RINGS); + adev->vm_manager.fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) adev->vm_manager.seqno[i] = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 30bdec1..2056f99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -272,8 +272,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_sync *sync); + struct amdgpu_vm *vm); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 9737408..dd03819 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -416,7 +416,7 @@ static int gmc_v9_0_late_init(void *handle) /* Engine 16 is used for KFD and 17 for GART flushes */ for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) - BUG_ON(vm_inv_eng[i] > 16); + BUG_ON(vm_inv_eng[i] > 17); return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 4c9f8d8..bafa29c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1324,9 +1324,7 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { - .copy_pte_num_dw = 7, .copy_pte = sdma_v2_4_vm_copy_pte, - .write_pte = sdma_v2_4_vm_write_pte, .set_max_nums_pte_pde = 0x1fffff >> 3, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 10a0ceb..a8a9bac 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1748,9 +1748,7 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, .copy_pte = sdma_v3_0_vm_copy_pte, - .write_pte = sdma_v3_0_vm_write_pte, /* not 0x3fffff due to HW limitation */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 6fa67a8..2d05e43 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1714,9 +1714,7 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, .copy_pte = sdma_v4_0_vm_copy_pte, - .write_pte = sdma_v4_0_vm_write_pte, .set_max_nums_pte_pde = 0x400000 >> 3, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 3fa2fbf..adb6ae7 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -887,9 +887,7 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { - .copy_pte_num_dw = 5, .copy_pte = si_dma_vm_copy_pte, - .write_pte = si_dma_vm_write_pte, .set_max_nums_pte_pde = 0xffff8 >> 3, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index f565530..09e3fe65 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 4f4392a..274e8dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -41,6 +41,7 @@ */ #include +#include #include #include "kfd_priv.h" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c index c6be3ba..58aed52 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c @@ -21,6 +21,7 @@ */ #include +#include #include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index b96f684..6de9dd3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -22,6 +22,7 @@ */ #include +#include #include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index d603ce9..043a483 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -22,6 +22,7 @@ */ #include +#include #include #include #include "kfd_priv.h" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index a01e703..581e933 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -22,6 +22,7 @@ */ #include +#include #include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 27a0fa0..91c5380 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -22,6 +22,7 @@ */ #include +#include #include #include "kfd_device_queue_manager.h" #include "kfd_kernel_queue.h" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 23aaa4b..cceaa89 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -717,7 +717,7 @@ struct kfd_process { size_t signal_event_count; bool signal_event_limit_reached; - struct rb_root bo_interval_tree; + struct rb_root_cached bo_interval_tree; /* Information used for memory eviction */ void *process_info; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 4080ac0..b9f7e9a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -584,7 +584,7 @@ static struct kfd_process *create_process(const struct task_struct *thread, if (!process) goto err_alloc_process; - process->bo_interval_tree = RB_ROOT; + process->bo_interval_tree = RB_ROOT_CACHED; process->pasid = kfd_pasid_alloc(); if (process->pasid == 0) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ffd8e0f..de96561 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index b59b32c..e517d98 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -24,6 +24,7 @@ #define __KFD_TOPOLOGY_H__ #include +#include #include #include "kfd_priv.h" diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 86a1b47..8c466bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -23,8 +23,6 @@ * */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined(OS_NAME_RHEL_7_4) - #include "dm_services_types.h" #include "dc.h" @@ -1939,9 +1937,9 @@ static int fill_plane_attributes_from_fb( default: DRM_ERROR("Unsupported screen format %s\n", #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) - kcl_drm_get_format_name(fb->pixel_format, &format_name)); + drm_get_format_name(fb->pixel_format, &format_name)); #else - kcl_drm_get_format_name(fb->format->format, &format_name)); + drm_get_format_name(fb->format->format, &format_name)); #endif return -EINVAL; } @@ -3416,33 +3414,33 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, case DRM_PLANE_TYPE_PRIMARY: aplane->base.format_default = true; - res = kcl_drm_universal_plane_init( + res = drm_universal_plane_init( dm->adev->ddev, &aplane->base, possible_crtcs, &dm_plane_funcs, rgb_formats, - ARRAY_SIZE(rgb_formats), + ARRAY_SIZE(rgb_formats), NULL, aplane->base.type, NULL); break; case DRM_PLANE_TYPE_OVERLAY: - res = kcl_drm_universal_plane_init( + res = drm_universal_plane_init( dm->adev->ddev, &aplane->base, possible_crtcs, &dm_plane_funcs, yuv_formats, - ARRAY_SIZE(yuv_formats), + ARRAY_SIZE(yuv_formats), NULL, aplane->base.type, NULL); break; case DRM_PLANE_TYPE_CURSOR: - res = kcl_drm_universal_plane_init( + res = drm_universal_plane_init( dm->adev->ddev, &aplane->base, possible_crtcs, &dm_plane_funcs, cursor_formats, - ARRAY_SIZE(cursor_formats), + ARRAY_SIZE(cursor_formats), NULL, aplane->base.type, NULL); break; } @@ -3472,7 +3470,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, if (!acrtc) goto fail; - res = kcl_drm_crtc_init_with_planes( + res = drm_crtc_init_with_planes( dm->ddev, &acrtc->base, plane, @@ -3917,7 +3915,7 @@ int amdgpu_dm_encoder_init( { struct amdgpu_device *adev = dev->dev_private; - int res = kcl_drm_encoder_init(dev, + int res = drm_encoder_init(dev, &aencoder->base, &amdgpu_dm_encoder_funcs, DRM_MODE_ENCODER_TMDS, @@ -4359,7 +4357,7 @@ void amdgpu_dm_atomic_commit_tail( struct drm_connector_state *old_conn_state; struct dm_crtc_state *old_acrtc_state, *new_acrtc_state; - kcl_drm_atomic_helper_update_legacy_modeset_state(dev, state); + drm_atomic_helper_update_legacy_modeset_state(dev, state); dm_state = to_dm_atomic_state(state); @@ -5194,4 +5192,3 @@ void amdgpu_dm_remove_sink_from_freesync_module( } -#endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 02f7f04..9a61507 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -133,7 +133,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps( } static void get_payload_table( - struct amdgpu_dm_connector *aconnector, + struct amdgpu_connector *aconnector, struct dp_mst_stream_allocation_table *proposed_table) { int i; @@ -178,7 +178,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( struct dp_mst_stream_allocation_table *proposed_table, bool enable) { - struct amdgpu_dm_connector *aconnector; + struct amdgpu_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; int slots = 0; @@ -233,12 +233,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( pbn = drm_dp_calc_pbn_mode(clock, bpp); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) slots = drm_dp_find_vcpi_slots(mst_mgr, pbn); ret = drm_dp_mst_allocate_vcpi(mst_mgr, mst_port, pbn, slots); -#else - ret = drm_dp_mst_allocate_vcpi(mst_mgr, mst_port, pbn, &slots); -#endif if (!ret) return false; @@ -270,7 +266,7 @@ bool dm_helpers_dp_mst_poll_for_allocation_change_trigger( struct dc_context *ctx, const struct dc_stream_state *stream) { - struct amdgpu_dm_connector *aconnector; + struct amdgpu_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; int ret; @@ -297,7 +293,7 @@ bool dm_helpers_dp_mst_send_payload_allocation( const struct dc_stream_state *stream, bool enable) { - struct amdgpu_dm_connector *aconnector; + struct amdgpu_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; int ret; @@ -345,7 +341,10 @@ bool dm_helpers_dp_mst_start_top_mgr( const struct dc_link *link, bool boot) { - struct amdgpu_dm_connector *aconnector = link->priv; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) || \ + defined(OS_NAME_RHEL_7_3) || \ + defined(OS_NAME_RHEL_7_4) + struct amdgpu_connector *aconnector = link->priv; if (!aconnector) { DRM_ERROR("Failed to found connector for link!"); @@ -362,13 +361,19 @@ bool dm_helpers_dp_mst_start_top_mgr( aconnector, aconnector->base.base.id); return (drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true) == 0); +#else + return false; +#endif } void dm_helpers_dp_mst_stop_top_mgr( struct dc_context *ctx, const struct dc_link *link) { - struct amdgpu_dm_connector *aconnector = link->priv; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) || \ + defined(OS_NAME_RHEL_7_3) || \ + defined(OS_NAME_RHEL_7_4) + struct amdgpu_connector *aconnector = link->priv; if (!aconnector) { DRM_ERROR("Failed to found connector for link!"); @@ -380,6 +385,7 @@ void dm_helpers_dp_mst_stop_top_mgr( if (aconnector->mst_mgr.mst_state == true) drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, false); +#endif } bool dm_helpers_dp_read_dpcd( @@ -390,7 +396,7 @@ bool dm_helpers_dp_read_dpcd( uint32_t size) { - struct amdgpu_dm_connector *aconnector = link->priv; + struct amdgpu_connector *aconnector = link->priv; if (!aconnector) { DRM_ERROR("Failed to found connector for link!"); @@ -408,7 +414,7 @@ bool dm_helpers_dp_write_dpcd( const uint8_t *data, uint32_t size) { - struct amdgpu_dm_connector *aconnector = link->priv; + struct amdgpu_connector *aconnector = link->priv; if (!aconnector) { DRM_ERROR("Failed to found connector for link!"); @@ -424,7 +430,7 @@ bool dm_helpers_submit_i2c( const struct dc_link *link, struct i2c_command *cmd) { - struct amdgpu_dm_connector *aconnector = link->priv; + struct amdgpu_connector *aconnector = link->priv; struct i2c_msg *msgs; int i = 0; int num = cmd->number_of_payloads; @@ -459,7 +465,7 @@ enum dc_edid_status dm_helpers_read_local_edid( struct dc_link *link, struct dc_sink *sink) { - struct amdgpu_dm_connector *aconnector = link->priv; + struct amdgpu_connector *aconnector = link->priv; struct i2c_adapter *ddc; int retry = 3; enum dc_edid_status edid_status; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 276adf9..0df936f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -787,10 +787,10 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_connector *connector; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - struct amdgpu_dm_connector *amdgpu_dm_connector = - to_amdgpu_dm_connector(connector); + struct amdgpu_connector *amdgpu_connector = + to_amdgpu_connector(connector); - const struct dc_link *dc_link = amdgpu_dm_connector->dc_link; + const struct dc_link *dc_link = amdgpu_connector->dc_link; if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd) { dc_interrupt_set(adev->dm.dc, @@ -820,9 +820,9 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) struct drm_connector *connector; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - struct amdgpu_dm_connector *amdgpu_dm_connector = - to_amdgpu_dm_connector(connector); - const struct dc_link *dc_link = amdgpu_dm_connector->dc_link; + struct amdgpu_connector *amdgpu_connector = + to_amdgpu_connector(connector); + const struct dc_link *dc_link = amdgpu_connector->dc_link; dc_interrupt_set(adev->dm.dc, dc_link->irq_source_hpd, false); @@ -833,3 +833,4 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) } } } + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h index 9d30076..69cbf3c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_DM_IRQ_H__ #define __AMDGPU_DM_IRQ_H__ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined(OS_NAME_RHEL_7_4) + #include "irq_types.h" /* DAL irq definitions */ /* @@ -119,4 +121,7 @@ int amdgpu_dm_irq_suspend(struct amdgpu_device *adev); int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev); int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev); +#else +#include "../kcl_dm/kcl_dm_irq.h" +#endif /* KERNEL_VERSION */ #endif /* __AMDGPU_DM_IRQ_H__ */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index d798a52..68f7608 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -137,8 +137,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg static enum drm_connector_status dm_dp_mst_detect(struct drm_connector *connector, bool force) { - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct amdgpu_dm_connector *master = aconnector->mst_port; + struct amdgpu_connector *aconnector = to_amdgpu_connector(connector); + struct amdgpu_connector *master = aconnector->mst_port; enum drm_connector_status status = drm_dp_mst_detect_port( @@ -152,13 +152,13 @@ dm_dp_mst_detect(struct drm_connector *connector, bool force) static void dm_dp_mst_connector_destroy(struct drm_connector *connector) { - struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); - struct amdgpu_encoder *amdgpu_encoder = amdgpu_dm_connector->mst_encoder; + struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); + struct amdgpu_encoder *amdgpu_encoder = amdgpu_connector->mst_encoder; drm_encoder_cleanup(&amdgpu_encoder->base); kfree(amdgpu_encoder); drm_connector_cleanup(connector); - kfree(amdgpu_dm_connector); + kfree(amdgpu_connector); } static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { @@ -176,7 +176,7 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { static int dm_dp_mst_get_modes(struct drm_connector *connector) { - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_connector *aconnector = to_amdgpu_connector(connector); int ret = 0; ret = drm_add_edid_modes(&aconnector->base, aconnector->edid); @@ -188,9 +188,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) static struct drm_encoder *dm_mst_best_encoder(struct drm_connector *connector) { - struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); + struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - return &amdgpu_dm_connector->mst_encoder->base; + return &amdgpu_connector->mst_encoder->base; } static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs = { @@ -200,7 +200,7 @@ static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs }; static struct amdgpu_encoder * -dm_dp_create_fake_mst_encoder(struct amdgpu_dm_connector *connector) +dm_dp_create_fake_mst_encoder(struct amdgpu_connector *connector) { struct drm_device *dev = connector->base.dev; struct amdgpu_device *adev = dev->dev_private; @@ -235,15 +235,15 @@ static struct drm_connector *dm_dp_add_mst_connector(struct drm_dp_mst_topology_ struct drm_dp_mst_port *port, const char *pathprop) { - struct amdgpu_dm_connector *master = container_of(mgr, struct amdgpu_dm_connector, mst_mgr); + struct amdgpu_connector *master = container_of(mgr, struct amdgpu_connector, mst_mgr); struct drm_device *dev = master->base.dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_dm_connector *aconnector; + struct amdgpu_connector *aconnector; struct drm_connector *connector; drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - aconnector = to_amdgpu_dm_connector(connector); + aconnector = to_amdgpu_connector(connector); if (aconnector->mst_port == master && !aconnector->port) { DRM_INFO("DM_MST: reusing connector: %p [id: %d] [master: %p]\n", @@ -317,7 +317,7 @@ static void dm_dp_destroy_mst_connector( struct drm_dp_mst_topology_mgr *mgr, struct drm_connector *connector) { - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_connector *aconnector = to_amdgpu_connector(connector); DRM_INFO("DM_MST: Disabling connector: %p [id: %d] [master: %p]\n", aconnector, connector->base.id, aconnector->mst_port); @@ -341,17 +341,17 @@ static void dm_dp_destroy_mst_connector( static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) { - struct amdgpu_dm_connector *master = container_of(mgr, struct amdgpu_dm_connector, mst_mgr); + struct amdgpu_connector *master = container_of(mgr, struct amdgpu_connector, mst_mgr); struct drm_device *dev = master->base.dev; struct amdgpu_device *adev = dev->dev_private; struct drm_connector *connector; - struct amdgpu_dm_connector *aconnector; + struct amdgpu_connector *aconnector; struct edid *edid; struct dc_sink *dc_sink; drm_modeset_lock_all(dev); list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - aconnector = to_amdgpu_dm_connector(connector); + aconnector = to_amdgpu_connector(connector); if (aconnector->port && aconnector->port->pdt != DP_PEER_DEVICE_NONE && aconnector->port->pdt != DP_PEER_DEVICE_MST_BRANCHING && @@ -442,7 +442,7 @@ static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { void amdgpu_dm_initialize_dp_connector( struct amdgpu_display_manager *dm, - struct amdgpu_dm_connector *aconnector) + struct amdgpu_connector *aconnector) { aconnector->dm_dp_aux.aux.name = "dmdc"; aconnector->dm_dp_aux.aux.dev = dm->adev->dev; @@ -453,10 +453,13 @@ void amdgpu_dm_initialize_dp_connector( aconnector->mst_mgr.cbs = &dm_mst_cbs; drm_dp_mst_topology_mgr_init( &aconnector->mst_mgr, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + dm->adev->dev, +#else dm->adev->ddev, +#endif &aconnector->dm_dp_aux.aux, 16, 4, aconnector->connector_id); } - diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index ba64a40..14634bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -26,11 +26,17 @@ #ifndef __DAL_AMDGPU_DM_MST_TYPES_H__ #define __DAL_AMDGPU_DM_MST_TYPES_H__ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined(OS_NAME_RHEL_7_4) + struct amdgpu_display_manager; -struct amdgpu_dm_connector; +struct amdgpu_connector; void amdgpu_dm_initialize_dp_connector( struct amdgpu_display_manager *dm, - struct amdgpu_dm_connector *aconnector); + struct amdgpu_connector *aconnector); + +#else +#include "../kcl_dm/kcl_dm_mst_types.h" +#endif /* KERNEL_VERSION */ #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 257fbdd..3348e90 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -23,8 +23,6 @@ * */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined(OS_NAME_RHEL_7_4) - #include #include @@ -436,4 +434,3 @@ bool dm_pp_get_static_clocks( } /**** end of power component interfaces ****/ -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index e021efc..a7d661d 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -75,11 +75,7 @@ BREAK_TO_DEBUGGER(); \ } while (0) -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) #include -#else -#include -#endif #define dm_alloc(size) kzalloc(size, GFP_KERNEL) #define dm_realloc(ptr, size) krealloc(ptr, size, GFP_KERNEL) diff --git a/drivers/gpu/drm/amd/lib/Makefile b/drivers/gpu/drm/amd/lib/Makefile index 87cd700..35a44c2 100644 --- a/drivers/gpu/drm/amd/lib/Makefile +++ b/drivers/gpu/drm/amd/lib/Makefile @@ -6,6 +6,16 @@ # driver components or later moved to kernel/lib for sharing with # other drivers. +ifneq (,$(BUILD_AS_DKMS)) + CHASH_NAME = amdchash + $(CHASH_NAME)-y := chash.o +else + CHASH_NAME = chash +endif + ccflags-y := -I$(src)/../include -obj-$(CONFIG_CHASH) += chash.o +obj-$(CONFIG_CHASH) += $(CHASH_NAME).o + +LIB_FULL_PATH = $(src) +include $(LIB_FULL_PATH)/backport/Makefile diff --git a/drivers/gpu/drm/amd/lib/backport/Makefile b/drivers/gpu/drm/amd/lib/backport/Makefile new file mode 100644 index 0000000..e214316 --- /dev/null +++ b/drivers/gpu/drm/amd/lib/backport/Makefile @@ -0,0 +1,5 @@ +LINUXINCLUDE := $(DKMS_INCLUDE_PREFIX) $(LINUXINCLUDE) + +ccflags-y += \ + -I$(LIB_FULL_PATH) \ + -include backport/backport.h diff --git a/drivers/gpu/drm/amd/lib/backport/backport.h b/drivers/gpu/drm/amd/lib/backport/backport.h new file mode 100644 index 0000000..2c52212 --- /dev/null +++ b/drivers/gpu/drm/amd/lib/backport/backport.h @@ -0,0 +1,6 @@ +#ifndef LIB_BACKPORT_H +#define LIB_BACKPORT_H + +#include + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 9f51222..e1ff202 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1430,10 +1430,5 @@ int amd_powerplay_get_display_mode_validation_clocks(void *handle, mutex_unlock(&pp_handle->pp_lock); - if (ret) { - clocks->memory_max_clock = pp_dpm_get_mclk(handle, false); - clocks->engine_max_clock = pp_dpm_get_sclk(handle, false); - } - - return 0; + return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c index a651ebc..d1af148 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c @@ -830,9 +830,9 @@ static int init_over_drive_limits( const ATOM_Tonga_POWERPLAYTABLE *powerplay_table) { hwmgr->platform_descriptor.overdriveLimit.engineClock = - le32_to_cpu(powerplay_table->ulMaxODEngineClock); + le16_to_cpu(powerplay_table->ulMaxODEngineClock); hwmgr->platform_descriptor.overdriveLimit.memoryClock = - le32_to_cpu(powerplay_table->ulMaxODMemoryClock); + le16_to_cpu(powerplay_table->ulMaxODMemoryClock); hwmgr->platform_descriptor.minOverdriveVDDC = 0; hwmgr->platform_descriptor.maxOverdriveVDDC = 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c index 4298362..5929b1e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c @@ -456,9 +456,6 @@ static int rv_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->backend = data; - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_TablelessHardwareInterface); - result = rv_initialize_dpm_defaults(hwmgr); if (result != 0) { pr_err("rv_initialize_dpm_defaults failed\n"); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index b8a2fca..a301f64 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -1161,8 +1161,6 @@ static void vega10_setup_default_single_dpm_table(struct pp_hwmgr *hwmgr, { int i; - dpm_table->count = 0; - for (i = 0; i < dep_table->count; i++) { if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value <= dep_table->entries[i].clk) { @@ -1271,6 +1269,10 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); /* Initialize Sclk DPM table based on allow Sclk values */ + data->dpm_table.soc_table.count = 0; + data->dpm_table.gfx_table.count = 0; + data->dpm_table.dcef_table.count = 0; + dpm_table = &(data->dpm_table.soc_table); vega10_setup_default_single_dpm_table(hwmgr, dpm_table, @@ -2879,15 +2881,6 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "DPM is already running right , skipping re-enablement!", return 0); - if ((data->smu_version == 0x001c2c00) || - (data->smu_version == 0x001c2d00)) { - tmp_result = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, - PPSMC_MSG_UpdatePkgPwrPidAlpha, 1); - PP_ASSERT_WITH_CODE(!tmp_result, - "Failed to set package power PID!", - return tmp_result); - } - tmp_result = vega10_construct_voltage_tables(hwmgr); PP_ASSERT_WITH_CODE(!tmp_result, "Failed to contruct voltage tables!", diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h index a855072..5dbd54f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h @@ -130,7 +130,7 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_InitializeAcg 0x5F #define PPSMC_MSG_GetCurrPkgPwr 0x61 #define PPSMC_MSG_UpdatePkgPwrPidAlpha 0x68 -#define PPSMC_Message_Count 0x69 +#define PPSMC_Message_Count 0x62 typedef int PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 38cea6f..97c94f9 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -205,17 +205,32 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { struct amd_sched_rq *rq = entity->rq; + int r; if (!amd_sched_entity_is_initialized(sched, entity)) return; - /** * The client will not queue more IBs during this fini, consume existing - * queued IBs + * queued IBs or discard them on SIGKILL */ - wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); - + if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL) + r = -ERESTARTSYS; + else + r = wait_event_killable(sched->job_scheduled, + amd_sched_entity_is_idle(entity)); amd_sched_rq_remove_entity(rq, entity); + if (r) { + struct amd_sched_job *job; + + /* Park the kernel for a moment to make sure it isn't processing + * our enity. + */ + kthread_park(sched->thread); + kthread_unpark(sched->thread); + while (kfifo_out(&entity->job_queue, &job, sizeof(job))) + sched->ops->free_job(job); + + } kfifo_free(&entity->job_queue); } diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 1b82d73..df12fe6 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -446,6 +446,7 @@ void drm_vblank_cleanup(struct drm_device *dev) dev->num_crtcs = 0; } +EXPORT_SYMBOL(drm_vblank_cleanup); /** * drm_vblank_init - initialize vblank support diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h index 4e883fd..decaba9 100644 --- a/drivers/gpu/drm/radeon/cik_reg.h +++ b/drivers/gpu/drm/radeon/cik_reg.h @@ -206,6 +206,17 @@ #define SDMA0_CNTL 0xD010 #define SDMA1_CNTL 0xD810 +#define SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE_MASK 0x1 +#define SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT 0x0 +#define SH_STATIC_MEM_CONFIG__ELEMENT_SIZE_MASK 0x6 +#define SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT 0x1 +#define SH_STATIC_MEM_CONFIG__INDEX_STRIDE_MASK 0x18 +#define SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT 0x3 +#define SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE_MASK 0xe0 +#define SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT 0x5 +#define SH_STATIC_MEM_CONFIG__READ_ONLY_CNTL_MASK 0xff00 +#define SH_STATIC_MEM_CONFIG__READ_ONLY_CNTL__SHIFT 0x8 + enum { MAX_TRAPID = 8, /* 3 bits in the bitfield. */ MAX_WATCH_ADDRESSES = 4 diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index e210154..8db8824 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -1300,6 +1300,11 @@ #define TC_CFG_L1_VOLATILE 0xAC88 #define TC_CFG_L2_VOLATILE 0xAC8C +#define COMPUTE_STATIC_THREAD_MGMT_SE0 0xB858 +#define COMPUTE_STATIC_THREAD_MGMT_SE1 0xB85C +#define COMPUTE_STATIC_THREAD_MGMT_SE2 0xB860 +#define COMPUTE_STATIC_THREAD_MGMT_SE3 0xB864 + #define CP_RB0_BASE 0xC100 #define CP_RB0_CNTL 0xC104 #define RB_BUFSZ(x) ((x) << 0) @@ -1555,6 +1560,7 @@ #define CP_HQD_HQ_SCHEDULER1 0xC998u #define SH_STATIC_MEM_CONFIG 0x9604u +#define SH_HIDDEN_PRIVATE_BASE_VMID 0x2580 #define DB_RENDER_CONTROL 0x28000 @@ -2171,4 +2177,8 @@ #define IH_VMID_0_LUT 0x3D40u +#define ATC_ATS_DEBUG 0x3328 +#define NUM_REQUESTS_AT_ERR(x) ((x) << 10) +#define NUM_REQUESTS_AT_ERR_MASK (0x1F << 10) + #endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8cbaeec..c0fea4a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1839,6 +1839,17 @@ struct radeon_asic_ring { void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp); }; +struct radeon_cu_info { + uint32_t number; /* total active CU number */ + uint32_t ao_cu_mask; + uint32_t simd_per_cu; + uint32_t max_waves_per_simd; + uint32_t wave_front_size; + uint32_t max_scratch_slots_per_cu; + uint32_t lds_size; + uint32_t bitmap[4][4]; +}; + /* * ASIC specific functions. */ @@ -1861,6 +1872,7 @@ struct radeon_asic { uint64_t (*get_gpu_clock_counter)(struct radeon_device *rdev); /* get register for info ioctl */ int (*get_allowed_info_register)(struct radeon_device *rdev, u32 reg, u32 *val); + int (*get_cu_info)(struct radeon_device *rdev, struct radeon_cu_info *info); /* gart */ struct { void (*tlb_flush)(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index a2ab6dc..d670283 100755 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -42,11 +42,30 @@ static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { }; struct kgd_mem { - struct radeon_bo *bo; - uint64_t gpu_addr; - void *cpu_ptr; + union { + struct { + struct radeon_bo *bo; + uint64_t gpu_addr; + void *cpu_ptr; + } data1; + struct { + struct mutex lock; + struct radeon_bo *bo; + struct radeon_bo_va *bo_va; + bool mapped_to_gpu_memory; + } data2; + }; }; - +/* Helper functions*/ +static int add_bo_to_vm(struct radeon_device *rdev, uint64_t va, + struct radeon_vm *vm, struct radeon_bo *bo, + struct radeon_bo_va **bo_va); +static int map_bo_to_gpuvm(struct radeon_device *rdev, struct radeon_bo *bo, + struct radeon_bo_va *bo_va); +static int unmap_bo_from_gpuvm(struct radeon_device *rdev, + struct radeon_bo_va *bo_va); +static void remove_bo_from_vm(struct radeon_device *rdev, struct radeon_bo *bo, + struct radeon_bo_va *bo_va); static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -54,10 +73,28 @@ static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -static uint64_t get_vmem_size(struct kgd_dev *kgd); +static void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); + +static int create_process_vm(struct kgd_dev *kgd, void **vm, void **info); +static void destroy_process_vm(struct kgd_dev *kgd, void *vm); + +static uint32_t get_process_page_dir(void *vm); + +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, int fd, uint32_t handle, struct kgd_mem **mem); +static int map_memory_to_gpu(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm); +static int unmap_memory_from_gpu(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm); +static int alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags); +static int free_memory_of_gpu(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm); + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); /* @@ -75,12 +112,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -103,13 +143,28 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); +static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req); +static void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +static int alloc_memory_of_scratch(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, + uint8_t element_size, uint8_t index_stride, uint8_t mtype); +static int mmap_bo(struct kgd_dev *kgd, struct vm_area_struct *vma); +static int map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .create_process_vm = create_process_vm, + .destroy_process_vm = destroy_process_vm, + .get_process_page_dir = get_process_page_dir, + .open_graphic_handle = open_graphic_handle, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_pipeline = kgd_init_pipeline, @@ -127,7 +182,18 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version + .alloc_memory_of_gpu = alloc_memory_of_gpu, + .free_memory_of_gpu = free_memory_of_gpu, + .map_memory_to_gpu = map_memory_to_gpu, + .unmap_memory_to_gpu = unmap_memory_from_gpu, + .get_fw_version = get_fw_version, + .set_num_of_requests = set_num_of_requests, + .get_cu_info = get_cu_info, + .alloc_memory_of_scratch = alloc_memory_of_scratch, + .write_config_static_mem = write_config_static_mem, + .mmap_bo = mmap_bo, + .map_gtt_bo_to_kernel = map_gtt_bo_to_kernel, + .set_vm_context_page_table_base = set_vm_context_page_table_base, }; static const struct kgd2kfd_calls *kgd2kfd; @@ -184,6 +250,8 @@ void radeon_kfd_device_init(struct radeon_device *rdev) if (rdev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, + + .gpuvm_size = (uint64_t)radeon_vm_size << 30, .num_pipe_per_mec = 4, .num_queue_per_pipe = 8 }; @@ -257,7 +325,8 @@ static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, return -ENOMEM; r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, - RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo); + RADEON_GEM_GTT_WC, NULL, NULL, + &(*mem)->data1.bo); if (r) { dev_err(rdev->dev, "failed to allocate BO for amdkfd (%d)\n", r); @@ -265,38 +334,38 @@ static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, } /* map the buffer */ - r = radeon_bo_reserve((*mem)->bo, true); + r = radeon_bo_reserve((*mem)->data1.bo, true); if (r) { dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } - r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT, - &(*mem)->gpu_addr); + r = radeon_bo_pin((*mem)->data1.bo, RADEON_GEM_DOMAIN_GTT, + &(*mem)->data1.gpu_addr); if (r) { dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } - *gpu_addr = (*mem)->gpu_addr; + *gpu_addr = (*mem)->data1.gpu_addr; - r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); + r = radeon_bo_kmap((*mem)->data1.bo, &(*mem)->data1.cpu_ptr); if (r) { dev_err(rdev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } - *cpu_ptr = (*mem)->cpu_ptr; + *cpu_ptr = (*mem)->data1.cpu_ptr; - radeon_bo_unreserve((*mem)->bo); + radeon_bo_unreserve((*mem)->data1.bo); return 0; allocate_mem_kmap_bo_failed: - radeon_bo_unpin((*mem)->bo); + radeon_bo_unpin((*mem)->data1.bo); allocate_mem_pin_bo_failed: - radeon_bo_unreserve((*mem)->bo); + radeon_bo_unreserve((*mem)->data1.bo); allocate_mem_reserve_bo_failed: - radeon_bo_unref(&(*mem)->bo); + radeon_bo_unref(&(*mem)->data1.bo); return r; } @@ -307,21 +376,28 @@ static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) BUG_ON(mem == NULL); - radeon_bo_reserve(mem->bo, true); - radeon_bo_kunmap(mem->bo); - radeon_bo_unpin(mem->bo); - radeon_bo_unreserve(mem->bo); - radeon_bo_unref(&(mem->bo)); + radeon_bo_reserve(mem->data1.bo, true); + radeon_bo_kunmap(mem->data1.bo); + radeon_bo_unpin(mem->data1.bo); + radeon_bo_unreserve(mem->data1.bo); + radeon_bo_unref(&(mem->data1.bo)); kfree(mem); } -static uint64_t get_vmem_size(struct kgd_dev *kgd) +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info) { struct radeon_device *rdev = (struct radeon_device *)kgd; BUG_ON(kgd == NULL); - return rdev->mc.real_vram_size; + memset(mem_info, 0, sizeof(*mem_info)); + mem_info->local_mem_size_public = rdev->mc.visible_vram_size; + mem_info->local_mem_size_private = + rdev->mc.real_vram_size - + rdev->mc.visible_vram_size; + mem_info->vram_width = rdev->mc.vram_width; + mem_info->mem_clk_max = radeon_dpm_get_mclk(rdev, false); } static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) @@ -339,6 +415,131 @@ static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; } +/* + * Creates a VM context for HSA process + */ +static int create_process_vm(struct kgd_dev *kgd, void **vm, void **info) +{ + int ret; + struct radeon_vm *new_vm; + struct radeon_device *rdev = (struct radeon_device *) kgd; + + BUG_ON(kgd == NULL); + BUG_ON(vm == NULL); + + new_vm = kzalloc(sizeof(struct radeon_vm), GFP_KERNEL); + if (new_vm == NULL) + return -ENOMEM; + + /* Initialize the VM context, allocate the page directory and zero it */ + ret = radeon_vm_init(rdev, new_vm); + if (ret != 0) { + /* Undo everything related to the new VM context */ + radeon_vm_fini(rdev, new_vm); + kfree(new_vm); + new_vm = NULL; + } + + *vm = (void *) new_vm; + + pr_debug("Created process vm with address %p\n", *vm); + + return ret; +} + +/* + * Destroys a VM context of HSA process + */ +static void destroy_process_vm(struct kgd_dev *kgd, void *vm) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + struct radeon_vm *rvm = (struct radeon_vm *) vm; + + BUG_ON(kgd == NULL); + BUG_ON(vm == NULL); + + pr_debug("Destroying process vm with address %p\n", vm); + + /* Release the VM context */ + radeon_vm_fini(rdev, rvm); + kfree(vm); +} + +static uint32_t get_process_page_dir(void *vm) +{ + struct radeon_vm *rvm = (struct radeon_vm *) vm; + struct radeon_vm_id *vm_id; + + BUG_ON(rvm == NULL); + + vm_id = &rvm->ids[CAYMAN_RING_TYPE_CP1_INDEX]; + + return vm_id->pd_gpu_addr >> RADEON_GPU_PAGE_SHIFT; +} + +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, + int fd, uint32_t handle, struct kgd_mem **mem) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + int ret; + struct radeon_bo_va *bo_va; + struct radeon_bo *bo; + struct file *filp; + struct drm_gem_object *gem_obj; + + BUG_ON(kgd == NULL); + BUG_ON(kgd == NULL); + BUG_ON(mem == NULL); + BUG_ON(vm == NULL); + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) { + ret = -ENOMEM; + goto err; + } + mutex_init(&(*mem)->data2.lock); + + /* Translate fd to file */ + rcu_read_lock(); + filp = fcheck(fd); + rcu_read_unlock(); + + BUG_ON(filp == NULL); + + /* Get object by handle*/ + gem_obj = drm_gem_object_lookup(filp->private_data, handle); + BUG_ON(gem_obj == NULL); + + /* No need to increment GEM refcount*/ + drm_gem_object_unreference(gem_obj); + + bo = gem_to_radeon_bo(gem_obj); + + /* Inc TTM refcount*/ + ttm_bo_reference(&bo->tbo); + + ret = add_bo_to_vm(rdev, va, vm, bo, &bo_va); + if (ret != 0) + goto err_map; + + /* The allocated BO, PD and appropriate PTs are pinned, virtual to MC address mapping created */ + ret = map_bo_to_gpuvm(rdev, bo, bo_va); + if (ret != 0) + goto err_failed_to_pin_bo; + + (*mem)->data2.bo = bo; + (*mem)->data2.bo_va = bo_va; + return 0; + +err_failed_to_pin_bo: + remove_bo_from_vm(rdev, bo, bo_va); +err_map: + radeon_bo_unref(&bo); + kfree(*mem); +err: + return ret; +} + static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd) { return (struct radeon_device *)kgd; @@ -416,7 +617,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, * the SW cleared it. * So the protocol is to always wait & clear. */ - uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID_MASK; write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), @@ -446,7 +647,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; + mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; pipe = (pipe_id % CIK_PIPE_PER_MEC); lock_srbm(kgd, mec, pipe, 0, 0); @@ -482,7 +683,9 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) { uint32_t wptr_shadow, is_wptr_shadow_valid; struct cik_mqd *m; @@ -492,6 +695,16 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); acquire_queue(kgd, pipe_id, queue_id); + + write_register(kgd, COMPUTE_STATIC_THREAD_MGMT_SE0, + m->compute_static_thread_mgmt_se0); + write_register(kgd, COMPUTE_STATIC_THREAD_MGMT_SE1, + m->compute_static_thread_mgmt_se1); + write_register(kgd, COMPUTE_STATIC_THREAD_MGMT_SE2, + m->compute_static_thread_mgmt_se2); + write_register(kgd, COMPUTE_STATIC_THREAD_MGMT_SE3, + m->compute_static_thread_mgmt_se3); + write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control); @@ -558,7 +771,8 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) { struct cik_sdma_rlc_registers *m; uint32_t sdma_base_addr; @@ -636,7 +850,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { @@ -713,7 +927,7 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd) for (i = 0; i < MAX_WATCH_ADDRESSES; i++) write_register(kgd, watchRegs[i * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], + ADDRESS_WATCH_REG_CNTL], cntl.u32All); return 0; @@ -733,17 +947,17 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, cntl.bitfields.valid = 0; write_register(kgd, watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], + ADDRESS_WATCH_REG_CNTL], cntl.u32All); write_register(kgd, watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_HI], + ADDRESS_WATCH_REG_ADDR_HI], addr_hi); write_register(kgd, watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_LO], + ADDRESS_WATCH_REG_ADDR_LO], addr_lo); /* Enable the watch point */ @@ -751,7 +965,7 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, write_register(kgd, watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], + ADDRESS_WATCH_REG_CNTL], cntl.u32All); return 0; @@ -785,14 +999,15 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset) { - return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; + return ( + (watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]) >> 2 + ); } static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) { uint32_t reg; struct radeon_device *rdev = (struct radeon_device *) kgd; - reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); return reg & ATC_VMID_PASID_MAPPING_VALID_MASK; } @@ -802,7 +1017,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, { uint32_t reg; struct radeon_device *rdev = (struct radeon_device *) kgd; - reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); return reg & ATC_VMID_PASID_MAPPING_PASID_MASK; } @@ -810,10 +1024,539 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) { struct radeon_device *rdev = (struct radeon_device *) kgd; - return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid); } +static int add_bo_to_vm(struct radeon_device *rdev, uint64_t va, + struct radeon_vm *rvm, struct radeon_bo *bo, + struct radeon_bo_va **bo_va) +{ + int ret; + + BUG_ON(va == 0); + + radeon_bo_reserve(bo, true); + + /* Add BO to VM internal data structures*/ + *bo_va = radeon_vm_bo_add(rdev, rvm, bo); + if (*bo_va == NULL) { + ret = -EINVAL; + pr_err("amdkfd: Failed to add BO object to VM. ret == %d\n", + ret); + goto err_vmadd; + } + + /* + * Set virtual address for the allocation, allocate PTs, if needed, + * and zero them + */ + ret = radeon_vm_bo_set_addr(rdev, *bo_va, va, + RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_WRITEABLE); + if (ret != 0) { + pr_err("amdkfd: Failed to set virtual address for BO. ret == %d\n", + ret); + pr_debug("va == 0x%08llx\n", va); + goto err_vmsetaddr; + } + + return 0; + +err_vmsetaddr: + radeon_vm_bo_rmv(rdev, *bo_va); + mutex_lock(&rvm->mutex); + radeon_vm_clear_freed(rdev, rvm); + mutex_unlock(&rvm->mutex); + /* Don't fall through to unreserve because the BO was already + unreserved by radeon_vm_bo_set_addr. */ + return ret; +err_vmadd: + radeon_bo_unreserve(bo); + return ret; +} + +static void remove_bo_from_vm(struct radeon_device *rdev, struct radeon_bo *bo, + struct radeon_bo_va *bo_va) +{ + radeon_bo_reserve(bo, true); + radeon_vm_bo_rmv(rdev, bo_va); + radeon_bo_unreserve(bo); +} + + +static int try_pin_bo(struct radeon_bo *bo, uint64_t *mc_address, bool resv) +{ + int ret; + + if (resv) { + ret = radeon_bo_reserve(bo, true); + if (ret != 0) + return ret; + } + + ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_VRAM, mc_address); + if (ret != 0) { + if (resv) + radeon_bo_unreserve(bo); + return ret; + } + + if (resv) + radeon_bo_unreserve(bo); + + return 0; +} + +static int unpin_bo(struct radeon_bo *bo, bool resv) +{ + int ret; + + if (resv) { + ret = radeon_bo_reserve(bo, true); + if (ret != 0) + return ret; + } + + ret = radeon_bo_unpin(bo); + if (ret != 0) { + if (resv) + radeon_bo_unreserve(bo); + return ret; + } + + if (resv) + radeon_bo_unreserve(bo); + + return 0; +} + + +static int try_pin_pts(struct radeon_bo_va *bo_va, bool resv) +{ + int ret; + uint64_t pt_idx, start, last, failed; + struct radeon_vm *vm; + + vm = bo_va->vm; + start = bo_va->it.start >> radeon_vm_block_size; + last = bo_va->it.last >> radeon_vm_block_size; + + pr_debug("start PT index %llu last PT index %llu\n", start, last); + + /* walk over the address space and pin the page tables BOs*/ + for (pt_idx = start; pt_idx <= last; pt_idx++) { + ret = try_pin_bo(vm->page_tables[pt_idx].bo, NULL, resv); + if (ret != 0) { + failed = pt_idx; + goto err; + } + } + + return 0; + +err: + /* Unpin all already pinned BOs*/ + if (failed > 0) { + for (pt_idx = start; pt_idx <= failed - 1; pt_idx++) + unpin_bo(vm->page_tables[pt_idx].bo, resv); + } + return ret; +} + +static void unpin_pts(struct radeon_bo_va *bo_va, struct radeon_vm *vm, + bool resv) +{ + uint64_t pt_idx, start, last; + + start = bo_va->it.start >> radeon_vm_block_size; + last = bo_va->it.last >> radeon_vm_block_size; + + pr_debug("start PT index %llu last PT index %llu\n", start, last); + + /* walk over the address space and unpin the page tables BOs*/ + for (pt_idx = start; pt_idx <= last; pt_idx++) + unpin_bo(vm->page_tables[pt_idx].bo, resv); + +} + +static int map_bo_to_gpuvm(struct radeon_device *rdev, struct radeon_bo *bo, + struct radeon_bo_va *bo_va) +{ + struct radeon_vm_id *vm_id; + struct radeon_vm *vm; + int ret; + struct radeon_bo_list *vm_bos, *lobj; + struct ww_acquire_ctx ticket; + struct list_head list; + + INIT_LIST_HEAD(&list); + + vm = bo_va->vm; + + /* Pin BO*/ + ret = try_pin_bo(bo, NULL, true); + if (ret != 0) { + pr_err("amdkfd: Failed to pin BO\n"); + return ret; + } + + vm_bos = radeon_vm_get_bos(rdev, vm, &list); + if (!vm_bos) { + pr_err("amdkfd: Failed to get bos from vm\n"); + goto err_failed_to_get_bos; + } + + ret = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); + if (ret) { + pr_err("amdkfd: Failed to reserve buffers in ttm\n"); + goto err_failed_to_ttm_reserve; + } + + /* Pin PTs */ + ret = try_pin_pts(bo_va, false); + if (ret != 0) { + pr_err("amdkfd: Failed to pin PTs\n"); + goto err_failed_to_pin_pts; + } + + /* Pin the PD directory*/ + vm_id = &vm->ids[CAYMAN_RING_TYPE_CP1_INDEX]; + ret = try_pin_bo(vm->page_directory, &vm_id->pd_gpu_addr, false); + if (ret != 0) { + pr_err("amdkfd: Failed to pin PD\n"); + goto err_failed_to_pin_pd; + } + + mutex_lock(&vm->mutex); + + /* Update the page directory */ + ret = radeon_vm_update_page_directory(rdev, vm); + if (ret != 0) { + pr_err("amdkfd: Failed to radeon_vm_update_page_directory\n"); + goto err_failed_to_update_pd; + } + + /* + * The previously "released" BOs are really released and their VAs are + * removed from PT. This function is called here because it requires + * the radeon_vm::mutex to be locked and PT to be reserved + */ + ret = radeon_vm_clear_freed(rdev, vm); + if (ret != 0) { + pr_err("amdkfd: Failed to radeon_vm_clear_freed\n"); + goto err_failed_vm_clear_freed; + } + + /* Update the page tables */ + ret = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); + if (ret != 0) { + pr_err("amdkfd: Failed to radeon_vm_bo_update\n"); + goto err_failed_to_update_pts; + } + + ret = radeon_vm_clear_invalids(rdev, vm); + if (ret != 0) { + pr_err("amdkfd: Failed to radeon_vm_clear_invalids\n"); + goto err_failed_to_vm_clear_invalids; + } + + mutex_unlock(&vm->mutex); + + list_for_each_entry(lobj, &list, tv.head) { + struct radeon_bo *bo = lobj->robj; + ret = ttm_bo_wait(&bo->tbo, false, false); + if (ret != 0) { + pr_err("amdkfd: Failed to wait for PT/PD update (err == %d)\n", + ret); + goto err_failed_to_wait_pt_pd_update; + } + } + + ttm_eu_backoff_reservation(&ticket, &list); + kvfree(vm_bos); + + return 0; + +err_failed_to_wait_pt_pd_update: + mutex_lock(&vm->mutex); +err_failed_to_vm_clear_invalids: + radeon_vm_bo_update(rdev, bo_va, NULL); +err_failed_to_update_pts: +err_failed_vm_clear_freed: +err_failed_to_update_pd: + mutex_unlock(&vm->mutex); + unpin_bo(vm->page_directory, false); +err_failed_to_pin_pd: + unpin_pts(bo_va, vm, false); +err_failed_to_pin_pts: + ttm_eu_backoff_reservation(&ticket, &list); +err_failed_to_ttm_reserve: + kvfree(vm_bos); +err_failed_to_get_bos: + unpin_bo(bo, true); + + return ret; +} + +static int unmap_bo_from_gpuvm(struct radeon_device *rdev, + struct radeon_bo_va *bo_va) +{ + struct radeon_vm *vm; + int ret; + struct ttm_validate_buffer tv; + struct radeon_bo_list *vm_bos; + struct ww_acquire_ctx ticket; + struct list_head list; + + INIT_LIST_HEAD(&list); + + vm = bo_va->vm; + tv.bo = &bo_va->bo->tbo; + tv.shared = true; + list_add(&tv.head, &list); + + vm_bos = radeon_vm_get_bos(rdev, vm, &list); + if (!vm_bos) { + pr_err("amdkfd: Failed to get bos from vm\n"); + ret = -ENOMEM; + goto err_failed_to_get_bos; + } + + ret = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); + if (ret) { + pr_err("amdkfd: Failed to reserve buffers in ttm\n"); + goto err_failed_to_ttm_reserve; + } + + mutex_lock(&vm->mutex); + + /* + * The previously "released" BOs are really released and their VAs are + * removed from PT. This function is called here because it requires + * the radeon_vm::mutex to be locked and PT to be reserved + */ + radeon_vm_clear_freed(rdev, vm); + + /* Update the page tables - Remove the mapping from bo_va */ + radeon_vm_bo_update(rdev, bo_va, NULL); + + radeon_vm_clear_invalids(rdev, vm); + + mutex_unlock(&vm->mutex); + + ttm_eu_backoff_reservation(&ticket, &list); + kvfree(vm_bos); + + return 0; + +err_failed_to_ttm_reserve: + kvfree(vm_bos); +err_failed_to_get_bos: + return ret; +} + +static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, + uint8_t element_size, uint8_t index_stride, uint8_t mtype) +{ + uint32_t reg; + struct radeon_device *rdev = (struct radeon_device *) kgd; + + reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT | + element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT | + index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT | + index_stride << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT; + + WREG32(SH_STATIC_MEM_CONFIG, reg); + return 0; +} +static int alloc_memory_of_scratch(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + + lock_srbm(kgd, 0, 0, 0, vmid); + WREG32(SH_HIDDEN_PRIVATE_BASE_VMID, va); + unlock_srbm(kgd); + + return 0; +} + +static int alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + int ret; + struct radeon_bo_va *bo_va; + struct radeon_bo *bo; + + BUG_ON(kgd == NULL); + BUG_ON(size == 0); + BUG_ON(mem == NULL); + BUG_ON(vm == NULL); + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (*mem == NULL) { + ret = -ENOMEM; + goto err; + } + mutex_init(&(*mem)->data2.lock); + + /* Allocate buffer object on VRAM */ + ret = radeon_bo_create(rdev, size, PAGE_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, + RADEON_GEM_NO_CPU_ACCESS, NULL, NULL, &bo); + if (ret != 0) { + pr_err("amdkfd: Failed to create BO object on VRAM. ret == %d\n", + ret); + goto err_bo_create; + } + + pr_debug("Created BO on VRAM with size %llu bytes\n", size); + ret = add_bo_to_vm(rdev, va, vm, bo, &bo_va); + if (ret != 0) + goto err_map; + + pr_debug("Set BO to VA %p\n", (void *) va); + + (*mem)->data2.bo = bo; + (*mem)->data2.bo_va = bo_va; + (*mem)->data2.mapped_to_gpu_memory = 0; + + return 0; + +err_map: + radeon_bo_unref(&bo); +err_bo_create: + kfree(*mem); +err: + return ret; + +} + +static int free_memory_of_gpu(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + + BUG_ON(kgd == NULL); + BUG_ON(mem == NULL); + + mutex_lock(&mem->data2.lock); + + if (mem->data2.mapped_to_gpu_memory == 1) { + pr_debug("BO with VA %p, size %lu bytes is mapped to GPU. Need to unmap it before release\n", + (void *) (mem->data2.bo_va->it.start * RADEON_GPU_PAGE_SIZE), + mem->data2.bo->tbo.mem.size); + mutex_unlock(&mem->data2.lock); + unmap_memory_from_gpu(kgd, mem, NULL); + } else + mutex_unlock(&mem->data2.lock); + /* lock is not needed after this, since mem is unused and will + * be freed anyway */ + + pr_debug("Releasing BO with VA %p, size %lu bytes\n", + (void *) (mem->data2.bo_va->it.start * RADEON_GPU_PAGE_SIZE), + mem->data2.bo->tbo.mem.size); + + /* Remove from VM internal data structures */ + remove_bo_from_vm(rdev, mem->data2.bo, mem->data2.bo_va); + + /* Free the BO*/ + radeon_bo_unref(&mem->data2.bo); + kfree(mem); + + return 0; +} + +static int map_memory_to_gpu(struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + int ret; + struct radeon_bo_va *bo_va; + struct radeon_bo *bo; + + BUG_ON(kgd == NULL); + BUG_ON(mem == NULL); + + mutex_lock(&mem->data2.lock); + + bo = mem->data2.bo; + bo_va = mem->data2.bo_va; + + if (mem->data2.mapped_to_gpu_memory == 1) { + pr_debug("BO with VA %p, size %lu bytes already mapped to GPU memory\n", + (void *) (mem->data2.bo_va->it.start * RADEON_GPU_PAGE_SIZE), + mem->data2.bo->tbo.mem.size); + mutex_unlock(&mem->data2.lock); + return 0; + } + + pr_debug("Mapping BO with VA %p, size %lu bytes to GPU memory\n", + (void *) (mem->data2.bo_va->it.start * RADEON_GPU_PAGE_SIZE), + mem->data2.bo->tbo.mem.size); + + /* + * We need to pin the allocated BO, PD and appropriate PTs and to + * create a mapping of virtual to MC address + */ + ret = map_bo_to_gpuvm(rdev, bo, bo_va); + if (ret != 0) { + pr_err("amdkfd: Failed to map radeon bo to gpuvm\n"); + mutex_unlock(&mem->data2.lock); + return ret; + } + + mem->data2.mapped_to_gpu_memory = 1; + + mutex_unlock(&mem->data2.lock); + + return ret; +} + +static int unmap_memory_from_gpu(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + struct radeon_bo_va *bo_va; + int ret = 0; + + BUG_ON(kgd == NULL); + BUG_ON(mem == NULL); + + mutex_lock(&mem->data2.lock); + + if (mem->data2.mapped_to_gpu_memory == 0) { + pr_debug("Unmapping BO with VA %p, size %lu bytes from GPU memory is unnecessary\n", + (void *) (mem->data2.bo_va->it.start * RADEON_GPU_PAGE_SIZE), + mem->data2.bo->tbo.mem.size); + mutex_lock(&mem->data2.lock); + return 0; + } + + pr_debug("Unmapping BO with VA %p, size %lu bytes from GPU memory\n", + (void *) (mem->data2.bo_va->it.start * RADEON_GPU_PAGE_SIZE), + mem->data2.bo->tbo.mem.size); + + bo_va = mem->data2.bo_va; + + /* Unpin the PD directory*/ + unpin_bo(bo_va->vm->page_directory, true); + + /* Unpin PTs */ + unpin_pts(bo_va, bo_va->vm, true); + + /* Unpin BO*/ + unpin_bo(mem->data2.bo, true); + + ret = unmap_bo_from_gpuvm(rdev, bo_va); + + mem->data2.mapped_to_gpu_memory = 0; + + mutex_unlock(&mem->data2.lock); + + return ret; +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct radeon_device *rdev = (struct radeon_device *) kgd; @@ -848,7 +1591,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) break; case KGD_ENGINE_SDMA1: - case KGD_ENGINE_SDMA2: hdr = (const union radeon_firmware_header *) rdev->sdma_fw->data; break; @@ -863,3 +1605,65 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) /* Only 12 bit in use*/ return hdr->common.ucode_version; } + +static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req) +{ + uint32_t value; + + value = read_register(dev, ATC_ATS_DEBUG); + value &= ~NUM_REQUESTS_AT_ERR_MASK; + value |= NUM_REQUESTS_AT_ERR(num_of_req); + + write_register(dev, ATC_ATS_DEBUG, value); +} + +static void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + struct radeon_cu_info rcu_info; + + memset(cu_info, 0, sizeof(*cu_info)); + if (sizeof(cu_info->cu_bitmap) != sizeof(rcu_info.bitmap)) + return; + if (rdev->asic->get_cu_info == NULL) + return; + + memset(&rcu_info, 0, sizeof(rcu_info)); + rdev->asic->get_cu_info(rdev, &rcu_info); + cu_info->cu_active_number = rcu_info.number; + cu_info->cu_ao_mask = rcu_info.ao_cu_mask; + memcpy(&cu_info->cu_bitmap[0], &rcu_info.bitmap[0], sizeof(rcu_info.bitmap)); + cu_info->num_shader_engines = rdev->config.cik.max_shader_engines; + cu_info->num_shader_arrays_per_engine = rdev->config.cik.max_sh_per_se; + cu_info->num_cu_per_sh = rdev->config.cik.max_cu_per_sh; + cu_info->simd_per_cu = rcu_info.simd_per_cu; + cu_info->max_waves_per_simd = rcu_info.max_waves_per_simd; + cu_info->wave_front_size = rcu_info.wave_front_size; + cu_info->max_scratch_slots_per_cu = rcu_info.max_scratch_slots_per_cu; + cu_info->lds_size = rcu_info.lds_size; +} + +static int mmap_bo(struct kgd_dev *kgd, struct vm_area_struct *vma) +{ + return 0; +} + +static int map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr) +{ + return 0; +} + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct radeon_device *rdev = get_radeon_device(kgd); + + if (vmid < 8 || vmid > 15) { + pr_err("amdkfd: trying to set page table base for wrong VMID\n"); + return; + } + WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + + diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 33fe959..76b0914 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -84,6 +84,37 @@ struct drm_fb_helper_surface_size { * Driver callbacks used by the fbdev emulation helper library. */ struct drm_fb_helper_funcs { + /** + * @gamma_set: + * + * Set the given gamma LUT register on the given CRTC. + * + * This callback is optional. + * + * FIXME: + * + * This callback is functionally redundant with the core gamma table + * support and simply exists because the fbdev hasn't yet been + * refactored to use the core gamma table interfaces. + */ + void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green, + u16 blue, int regno); + /** + * @gamma_get: + * + * Read the given gamma LUT register on the given CRTC, used to save the + * current LUT when force-restoring the fbdev for e.g. kdbg. + * + * This callback is optional. + * + * FIXME: + * + * This callback is functionally redundant with the core gamma table + * support and simply exists because the fbdev hasn't yet been + * refactored to use the core gamma table interfaces. + */ + void (*gamma_get)(struct drm_crtc *crtc, u16 *red, u16 *green, + u16 *blue, int regno); /** * @fb_probe: * diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h index d0d1f2a..67a6e2e 100644 --- a/include/drm/drm_vblank.h +++ b/include/drm/drm_vblank.h @@ -168,6 +168,7 @@ void drm_crtc_wait_one_vblank(struct drm_crtc *crtc); void drm_crtc_vblank_off(struct drm_crtc *crtc); void drm_crtc_vblank_reset(struct drm_crtc *crtc); void drm_crtc_vblank_on(struct drm_crtc *crtc); +void drm_vblank_cleanup(struct drm_device *dev); u32 drm_crtc_accurate_vblank_count(struct drm_crtc *crtc); u32 drm_accurate_vblank_count(struct drm_crtc *crtc); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 36a4c1a..69f72d1 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -833,6 +833,8 @@ struct drm_amdgpu_info_firmware { #define AMDGPU_VRAM_TYPE_HBM 6 #define AMDGPU_VRAM_TYPE_DDR3 7 +#define AMDGPU_VRAM_TYPE_HBM_WIDTH 4096 + struct drm_amdgpu_info_device { /** PCI Device ID */ __u32 device_id; -- 2.7.4