aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux
diff options
context:
space:
mode:
authorSanjay R Mehta <sanju.mehta@amd.com>2016-07-20 14:55:51 +0530
committerAwais Belal <awais_belal@mentor.com>2016-07-20 17:57:16 +0500
commite38f9083a8b0abb953ad9b7197409c9f76626747 (patch)
tree26f5c35268a72e70f2d74ec374c95e9fcee9e7dc /common/recipes-kernel/linux
parent36fe64952ef4ba0f3c6d9ab5fb8d6f8b245e09b2 (diff)
downloadmeta-amd-e38f9083a8b0abb953ad9b7197409c9f76626747.tar.gz
meta-amd-e38f9083a8b0abb953ad9b7197409c9f76626747.tar.bz2
meta-amd-e38f9083a8b0abb953ad9b7197409c9f76626747.zip
Add power gating initialization support for GFX8.0
Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
Diffstat (limited to 'common/recipes-kernel/linux')
-rw-r--r--common/recipes-kernel/linux/files/1573-Add-power-gating-initialization-support-for-GFX8.0.patch9869
-rw-r--r--common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc1
2 files changed, 9870 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/1573-Add-power-gating-initialization-support-for-GFX8.0.patch b/common/recipes-kernel/linux/files/1573-Add-power-gating-initialization-support-for-GFX8.0.patch
new file mode 100644
index 00000000..be4fb9fa
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1573-Add-power-gating-initialization-support-for-GFX8.0.patch
@@ -0,0 +1,9869 @@
+From c712d1b59efd1e29dd032871a3a34cbfd0c7af75 Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Wed, 20 Jul 2016 14:20:32 +0530
+Subject: [PATCH] Add power gating initialization support for GFX8.0
+
+ - Enable GFX PG on CZ.
+ Tested with xonotic-glx/glxgears/supertuxkart and idle desktop.
+ Also read-back registers via umr for verificiation that the bits# Please enter the commit message for your changes. Lines starting
+ are truly enabled.# with '#' will be ignored, and an empty message aborts the commit.
+ - Enable CG for UVD6 on Carrizo# modified: drivers/gpu/drm/amd/amdgpu/Kconfig
+ - Use dal driver for CZ#
+ - Enable gmc clockgating for CZ
+ - Enable gfx clockgating for CZ
+ - Fetch the values from atom rather than hardcoding them in the
+ driver.
+ - Fetch cu_info once at init and just store the results
+ for future requests.
+ - fix memory leak in CGS (FW info).Previously requested FW
+ pointer should not be overwrite.
+ - add query GFX cu info in CGS query system info.
+ Needed for per CU powergating.
+ - add an cgs interface to notify amdgpu the dpm state.
+ - fix segment fault issue in multi-display case.
+ - keep vm in job instead of ib.
+ ib.vm is a legacy way to get vm, after scheduler
+ implemented vm should be get from job, and all ibs
+ from one job share the same vm, no need to keep ib.vm
+ just move vm field to job.
+ this patch as well add job as paramter to ib_schedule
+ so it can get vm from job->vm.
+ - remove sorting of CS BOs. Not needed any more.
+ - create fence slab once when amdgpu module init.
+ v2: add functions for init/exit instead of moving the
+ variables into the driver.
+ - fence wait old rcu slot.
+ since the rcu slot was initialized to be num_hw_submission,
+ if command submission doesn't use scheduler, this limitation
+ will be invalid like uvd test.
+ - v2: recreate from scratch, avoid all unineccessary changes.
+ - Should always flush & invalidate hdp no matter vm used or not.
+ - use ctx pointer is not safe, cuz they are likely already
+ be assigned to another ctx when doing comparing.
+ fence_context is always increasing and have rare chance
+ to overback to used number for jobs that scheduled to
+ ring continueonsly
+ - use a sync object for VMID fences. This way we can store more than
+ one fence as user for each VMID.
+ - forbid mapping of userptr bo through radeon device file.
+ Allowing userptr bo which are basicly a list of page from some vma
+ (so either anonymous page or file backed page) would lead to serious
+ corruption of kernel structures and counters (because we overwrite
+ the page->mapping field when mapping buffer).
+ This will already block if the buffer was populated before anyone does
+ try to mmap it because then TTM_PAGE_FLAG_SG would be set in in the
+ ttm_tt flags. But that flag is check before ttm_tt_populate in the ttm
+ vm fault handler.
+ So to be safe just add a check to verify_access() callback.
+ - group BOs by log2 of the size on the LRU v2.
+ This allows us to have small BOs on the LRU before big ones.
+ - implement LRU add callbacks v2.
+ This allows fine grained control for the driver where to add a BO into the LRU.
+ - Mark all instances of struct drm_info_list as const.
+ All these are compile time constand and the
+ drm_debugfs_create/remove_files functions take a const
+ pointer argument.
+ - Don't move pinned BOs. The purpose of pinning is to prevent a buffer from moving.
+ - when suspending, if uvd/vce was running. need to cancel delay work.
+ This fixes the issue that when resume back, uvd/vce dpm was disabled
+ and uvd/vce's performace dropped.
+
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/Kconfig | 10 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 197 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 30 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h | 2 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 176 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 127 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 296 ++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 43 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 39 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 53 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 6 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 36 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 87 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 4 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 30 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 32 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 7 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 160 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 105 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 29 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 145 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 24 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 3 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 450 ++---
+ drivers/gpu/drm/amd/amdgpu/atom.h | 2 +-
+ drivers/gpu/drm/amd/amdgpu/atombios_crtc.c | 98 +-
+ drivers/gpu/drm/amd/amdgpu/atombios_crtc.h | 2 +
+ drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 97 +-
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1946 ++++++++++++++++----
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h | 1 -
+ drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 182 +-
+ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 9 +-
+ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 223 +--
+ drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h | 1 +
+ drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 3 +-
+ drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 3 +-
+ drivers/gpu/drm/amd/amdgpu/vi.c | 282 ++-
+ drivers/gpu/drm/amd/include/amd_shared.h | 7 +
+ .../gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h | 2 +
+ drivers/gpu/drm/amd/include/atombios.h | 663 ++++++-
+ drivers/gpu/drm/amd/include/cgs_common.h | 84 +-
+ 42 files changed, 4256 insertions(+), 1456 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
+index 27f2000..3e29c86 100644
+--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
++++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
+@@ -16,7 +16,15 @@ config DRM_AMDGPU_USERPTR
+ This option selects CONFIG_MMU_NOTIFIER if it isn't already
+ selected to enabled full userptr support.
+
++config DRM_AMDGPU_GART_DEBUGFS
++ bool "Allow GART access through debugfs"
++ depends on DRM_AMDGPU
++ depends on DEBUG_FS
++ default n
++ help
++ Selecting this option creates a debugfs file to inspect the mapped
++ pages. Uses more memory for housekeeping, enable only for debugging.
++
+ source "drivers/gpu/drm/amd/powerplay/Kconfig"
+ source "drivers/gpu/drm/amd/acp/Kconfig"
+ source "drivers/gpu/drm/amd/dal/Kconfig"
+-
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 80d5cef..0873301 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -285,7 +285,8 @@ struct amdgpu_ring_funcs {
+ int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+ /* command emit functions */
+ void (*emit_ib)(struct amdgpu_ring *ring,
+- struct amdgpu_ib *ib);
++ struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch);
+ void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
+ uint64_t seq, unsigned flags);
+ void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
+@@ -369,13 +370,6 @@ struct amdgpu_fence_driver {
+ #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
+ #define AMDGPU_FENCE_FLAG_INT (1 << 1)
+
+-struct amdgpu_user_fence {
+- /* write-back bo */
+- struct amdgpu_bo *bo;
+- /* write-back address offset to bo start */
+- uint32_t offset;
+-};
+-
+ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
+ void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
+ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
+@@ -395,6 +389,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+ /*
+ * TTM.
+ */
++
++#define AMDGPU_TTM_LRU_SIZE 20
++
++struct amdgpu_mman_lru {
++ struct list_head *lru[TTM_NUM_MEM_TYPES];
++ struct list_head *swap_lru;
++};
++
+ struct amdgpu_mman {
+ struct ttm_bo_global_ref bo_global_ref;
+ struct drm_global_reference mem_global_ref;
+@@ -412,6 +414,9 @@ struct amdgpu_mman {
+ struct amdgpu_ring *buffer_funcs_ring;
+ /* Scheduler entity for buffer moves */
+ struct amd_sched_entity entity;
++
++ /* custom LRU management */
++ struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE];
+ };
+
+ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
+@@ -440,7 +445,6 @@ struct amdgpu_bo_va_mapping {
+
+ /* bo virtual addresses in a specific vm */
+ struct amdgpu_bo_va {
+- struct mutex mutex;
+ /* protected by bo being reserved */
+ struct list_head bo_list;
+ struct fence *last_pt_update;
+@@ -499,9 +503,10 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
+ struct drm_file *file_priv);
+ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
+ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+-struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+- struct dma_buf_attachment *attach,
+- struct sg_table *sg);
++struct drm_gem_object *
++amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
++ struct dma_buf_attachment *attach,
++ struct sg_table *sg);
+ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+ struct drm_gem_object *gobj,
+ int flags);
+@@ -591,11 +596,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
+ struct amdgpu_sync *sync,
+ struct reservation_object *resv,
+ void *owner);
++bool amdgpu_sync_is_idle(struct amdgpu_sync *sync);
++int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
++ struct fence *fence);
+ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+ int amdgpu_sync_wait(struct amdgpu_sync *sync);
+ void amdgpu_sync_free(struct amdgpu_sync *sync);
+ int amdgpu_sync_init(void);
+ void amdgpu_sync_fini(void);
++int amdgpu_fence_slab_init(void);
++void amdgpu_fence_slab_fini(void);
+
+ /*
+ * GART structures, functions & helpers
+@@ -614,8 +624,9 @@ struct amdgpu_gart {
+ unsigned num_gpu_pages;
+ unsigned num_cpu_pages;
+ unsigned table_size;
++#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+ struct page **pages;
+- dma_addr_t *pages_addr;
++#endif
+ bool ready;
+ const struct amdgpu_gart_funcs *gart_funcs;
+ };
+@@ -714,6 +725,7 @@ struct amdgpu_flip_work {
+ unsigned shared_count;
+ struct fence **shared;
+ struct fence_cb cb;
++ bool async;
+ };
+
+
+@@ -726,17 +738,7 @@ struct amdgpu_ib {
+ uint32_t length_dw;
+ uint64_t gpu_addr;
+ uint32_t *ptr;
+- struct amdgpu_user_fence *user;
+- struct amdgpu_vm *vm;
+- unsigned vm_id;
+- uint64_t vm_pd_addr;
+- struct amdgpu_ctx *ctx;
+- uint32_t gds_base, gds_size;
+- uint32_t gws_base, gws_size;
+- uint32_t oa_base, oa_size;
+ uint32_t flags;
+- /* resulting sequence number */
+- uint64_t sequence;
+ };
+
+ enum amdgpu_ring_type {
+@@ -750,7 +752,7 @@ enum amdgpu_ring_type {
+ extern struct amd_sched_backend_ops amdgpu_sched_ops;
+
+ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+- struct amdgpu_job **job);
++ struct amdgpu_job **job, struct amdgpu_vm *vm);
+ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+ struct amdgpu_job **job);
+
+@@ -765,7 +767,7 @@ struct amdgpu_ring {
+ struct amdgpu_device *adev;
+ const struct amdgpu_ring_funcs *funcs;
+ struct amdgpu_fence_driver fence_drv;
+- struct amd_gpu_scheduler sched;
++ struct amd_gpu_scheduler sched;
+
+ spinlock_t fence_lock;
+ struct amdgpu_bo *ring_obj;
+@@ -793,7 +795,7 @@ struct amdgpu_ring {
+ unsigned wptr_offs;
+ unsigned next_rptr_offs;
+ unsigned fence_offs;
+- struct amdgpu_ctx *current_ctx;
++ uint64_t current_ctx;
+ enum amdgpu_ring_type type;
+ char name[16];
+ unsigned cond_exe_offs;
+@@ -841,16 +843,8 @@ struct amdgpu_vm_pt {
+ uint64_t addr;
+ };
+
+-struct amdgpu_vm_id {
+- struct amdgpu_vm_manager_id *mgr_id;
+- uint64_t pd_gpu_addr;
+- /* last flushed PD/PT update */
+- struct fence *flushed_updates;
+-};
+-
+ struct amdgpu_vm {
+ /* tree of virtual addresses mapped */
+- spinlock_t it_lock;
+ struct rb_root va;
+
+ /* protecting invalidated */
+@@ -874,19 +868,29 @@ struct amdgpu_vm {
+ struct amdgpu_vm_pt *page_tables;
+
+ /* for id and flush management per ring */
+- struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS];
++ struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS];
+
+ /* protecting freed */
+ spinlock_t freed_lock;
+
+ /* Scheduler entity for page table updates */
+ struct amd_sched_entity entity;
++
++ /* client id */
++ u64 client_id;
+ };
+
+-struct amdgpu_vm_manager_id {
++struct amdgpu_vm_id {
+ struct list_head list;
+- struct fence *active;
+- atomic_long_t owner;
++ struct fence *first;
++ struct amdgpu_sync active;
++ struct fence *last_flush;
++ struct amdgpu_ring *last_user;
++ atomic64_t owner;
++
++ uint64_t pd_gpu_addr;
++ /* last flushed PD/PT update */
++ struct fence *flushed_updates;
+
+ uint32_t gds_base;
+ uint32_t gds_size;
+@@ -901,7 +905,7 @@ struct amdgpu_vm_manager {
+ struct mutex lock;
+ unsigned num_ids;
+ struct list_head ids_lru;
+- struct amdgpu_vm_manager_id ids[AMDGPU_NUM_VM];
++ struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
+
+ uint32_t max_pfn;
+ /* vram base address for page table entry */
+@@ -913,6 +917,8 @@ struct amdgpu_vm_manager {
+ struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
+ unsigned vm_pte_num_rings;
+ atomic_t vm_pte_next_ring;
++ /* client id counter */
++ atomic64_t client_counter;
+ };
+
+ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
+@@ -928,11 +934,11 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
+ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ struct amdgpu_sync *sync, struct fence *fence,
+ unsigned *vm_id, uint64_t *vm_pd_addr);
+-void amdgpu_vm_flush(struct amdgpu_ring *ring,
+- unsigned vm_id, uint64_t pd_addr,
+- uint32_t gds_base, uint32_t gds_size,
+- uint32_t gws_base, uint32_t gws_size,
+- uint32_t oa_base, uint32_t oa_size);
++int amdgpu_vm_flush(struct amdgpu_ring *ring,
++ unsigned vm_id, uint64_t pd_addr,
++ uint32_t gds_base, uint32_t gds_size,
++ uint32_t gws_base, uint32_t gws_size,
++ uint32_t oa_base, uint32_t oa_size);
+ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
+ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
+ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+@@ -1038,6 +1044,11 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
+ */
+ #include "clearstate_defs.h"
+
++struct amdgpu_rlc_funcs {
++ void (*enter_safe_mode)(struct amdgpu_device *adev);
++ void (*exit_safe_mode)(struct amdgpu_device *adev);
++};
++
+ struct amdgpu_rlc {
+ /* for power gating */
+ struct amdgpu_bo *save_restore_obj;
+@@ -1056,6 +1067,24 @@ struct amdgpu_rlc {
+ uint64_t cp_table_gpu_addr;
+ volatile uint32_t *cp_table_ptr;
+ u32 cp_table_size;
++
++ /* safe mode for updating CG/PG state */
++ bool in_safe_mode;
++ const struct amdgpu_rlc_funcs *funcs;
++
++ /* for firmware data */
++ u32 save_and_restore_offset;
++ u32 clear_state_descriptor_offset;
++ u32 avail_scratch_ram_locations;
++ u32 reg_restore_list_size;
++ u32 reg_list_format_start;
++ u32 reg_list_format_separate_start;
++ u32 starting_offsets_start;
++ u32 reg_list_format_size_bytes;
++ u32 reg_list_size_bytes;
++
++ u32 *register_list_format;
++ u32 *register_restore;
+ };
+
+ struct amdgpu_mec {
+@@ -1109,6 +1138,12 @@ struct amdgpu_gca_config {
+ uint32_t macrotile_mode_array[16];
+ };
+
++struct amdgpu_cu_info {
++ uint32_t number; /* total active CU number */
++ uint32_t ao_cu_mask;
++ uint32_t bitmap[4][4];
++};
++
+ struct amdgpu_gfx {
+ struct mutex gpu_clock_mutex;
+ struct amdgpu_gca_config config;
+@@ -1141,17 +1176,19 @@ struct amdgpu_gfx {
+ struct amdgpu_irq_src priv_reg_irq;
+ struct amdgpu_irq_src priv_inst_irq;
+ /* gfx status */
+- uint32_t gfx_current_status;
++ uint32_t gfx_current_status;
+ /* ce ram size*/
+- unsigned ce_ram_size;
++ unsigned ce_ram_size;
++ struct amdgpu_cu_info cu_info;
+ };
+
+ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned size, struct amdgpu_ib *ib);
+-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
++void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
++ struct fence *f);
+ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ struct amdgpu_ib *ib, struct fence *last_vm_update,
+- struct fence **f);
++ struct amdgpu_job *job, struct fence **f);
+ int amdgpu_ib_pool_init(struct amdgpu_device *adev);
+ void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
+ int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
+@@ -1176,7 +1213,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring);
+ struct amdgpu_cs_chunk {
+ uint32_t chunk_id;
+ uint32_t length_dw;
+- uint32_t *kdata;
++ void *kdata;
+ };
+
+ struct amdgpu_cs_parser {
+@@ -1207,13 +1244,25 @@ struct amdgpu_cs_parser {
+ struct amdgpu_job {
+ struct amd_sched_job base;
+ struct amdgpu_device *adev;
++ struct amdgpu_vm *vm;
+ struct amdgpu_ring *ring;
+ struct amdgpu_sync sync;
+ struct amdgpu_ib *ibs;
+ struct fence *fence; /* the hw fence */
+ uint32_t num_ibs;
+ void *owner;
+- struct amdgpu_user_fence uf;
++ uint64_t ctx;
++ unsigned vm_id;
++ uint64_t vm_pd_addr;
++ uint32_t gds_base, gds_size;
++ uint32_t gws_base, gws_size;
++ uint32_t oa_base, oa_size;
++
++ /* user fence handling */
++ struct amdgpu_bo *uf_bo;
++ uint32_t uf_offset;
++ uint64_t uf_sequence;
++
+ };
+ #define to_amdgpu_job(sched_job) \
+ container_of((sched_job), struct amdgpu_job, base)
+@@ -1605,6 +1654,8 @@ struct amdgpu_uvd {
+ struct amdgpu_bo *vcpu_bo;
+ void *cpu_addr;
+ uint64_t gpu_addr;
++ unsigned fw_version;
++ void *saved_bo;
+ unsigned max_handles;
+ atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
+ struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
+@@ -1658,7 +1709,7 @@ struct amdgpu_sdma {
+ struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_irq_src trap_irq;
+ struct amdgpu_irq_src illegal_inst_irq;
+- int num_instances;
++ int num_instances;
+ };
+
+ /*
+@@ -1704,12 +1755,12 @@ static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
+ * Debugfs
+ */
+ struct amdgpu_debugfs {
+- struct drm_info_list *files;
++ const struct drm_info_list *files;
+ unsigned num_files;
+ };
+
+ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
+- struct drm_info_list *files,
++ const struct drm_info_list *files,
+ unsigned nfiles);
+ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
+
+@@ -1751,13 +1802,6 @@ struct amdgpu_allowed_register_entry {
+ bool grbm_indexed;
+ };
+
+-struct amdgpu_cu_info {
+- uint32_t number; /* total active CU number */
+- uint32_t ao_cu_mask;
+- uint32_t bitmap[4][4];
+-};
+-
+-
+ /*
+ * ASIC specific functions.
+ */
+@@ -1775,7 +1819,6 @@ struct amdgpu_asic_funcs {
+ u32 (*get_xclk)(struct amdgpu_device *adev);
+ /* get the gpu clock counter */
+ uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+- int (*get_cu_info)(struct amdgpu_device *adev, struct amdgpu_cu_info *info);
+ /* MM block clocks */
+ int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
+ int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
+@@ -1868,15 +1911,8 @@ struct amdgpu_atcs {
+ /*
+ * CGS
+ */
+-void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
+-void amdgpu_cgs_destroy_device(void *cgs_device);
+-
+-
+-/*
+- * CGS
+- */
+-void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
+-void amdgpu_cgs_destroy_device(void *cgs_device);
++struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev);
++void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
+
+
+ /* GPU virtualization */
+@@ -1919,13 +1955,13 @@ struct amdgpu_device {
+ bool shutdown;
+ bool need_dma32;
+ bool accel_working;
+- struct work_struct reset_work;
++ struct work_struct reset_work;
+ struct notifier_block acpi_nb;
+ struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
+ struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
+- unsigned debugfs_count;
++ unsigned debugfs_count;
+ #if defined(CONFIG_DEBUG_FS)
+- struct dentry *debugfs_regs;
++ struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
+ #endif
+ struct amdgpu_atif atif;
+ struct amdgpu_atcs atcs;
+@@ -2050,6 +2086,7 @@ struct amdgpu_device {
+
+ /* tracking pinned memory */
+ u64 vram_pin_size;
++ u64 invisible_pin_size;
+ u64 gart_pin_size;
+
+ /* amdkfd interface */
+@@ -2076,7 +2113,6 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
+ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
+ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
+ bool amdgpu_device_has_dal_support(struct amdgpu_device *adev);
+-
+ /*
+ * Registers read & write functions.
+ */
+@@ -2178,7 +2214,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+ #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
+ #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
+ #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
+-#define amdgpu_asic_get_cu_info(adev, info) (adev)->asic_funcs->get_cu_info((adev), (info))
+ #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
+ #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+ #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
+@@ -2190,7 +2225,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+ #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
+ #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
+ #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
+-#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib))
++#define amdgpu_ring_emit_ib(r, ib, vm_id, c) (r)->funcs->emit_ib((r), (ib), (vm_id), (c))
+ #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
+ #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
+ #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
+@@ -2213,7 +2248,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+ #define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
+ #define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
+ #define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
+-#define amdgpu_display_page_flip(adev, crtc, base) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base))
++#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
+ #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
+ #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
+ #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
+@@ -2306,6 +2341,12 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+ #define amdgpu_dpm_force_clock_level(adev, type, level) \
+ (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
+
++#define amdgpu_dpm_get_sclk_od(adev) \
++ (adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle)
++
++#define amdgpu_dpm_set_sclk_od(adev, value) \
++ (adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value)
++
+ #define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \
+ (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
+
+@@ -2355,7 +2396,7 @@ static inline void amdgpu_unregister_atpx_handler(void) {}
+ * KMS
+ */
+ extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
+-extern int amdgpu_max_kms_ioctl;
++extern const int amdgpu_max_kms_ioctl;
+
+ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
+ int amdgpu_driver_unload_kms(struct drm_device *dev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+index 84b0ce3..6830ed4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+@@ -699,6 +699,36 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
+ return ret;
+ }
+
++union gfx_info {
++ ATOM_GFX_INFO_V2_1 info;
++};
++
++int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev)
++{
++ struct amdgpu_mode_info *mode_info = &adev->mode_info;
++ int index = GetIndexIntoMasterTable(DATA, GFX_Info);
++ uint8_t frev, crev;
++ uint16_t data_offset;
++ int ret = -EINVAL;
++
++ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
++ &frev, &crev, &data_offset)) {
++ union gfx_info *gfx_info = (union gfx_info *)
++ (mode_info->atom_context->bios + data_offset);
++
++ adev->gfx.config.max_shader_engines = gfx_info->info.max_shader_engines;
++ adev->gfx.config.max_tile_pipes = gfx_info->info.max_tile_pipes;
++ adev->gfx.config.max_cu_per_sh = gfx_info->info.max_cu_per_sh;
++ adev->gfx.config.max_sh_per_se = gfx_info->info.max_sh_per_se;
++ adev->gfx.config.max_backends_per_se = gfx_info->info.max_backends_per_se;
++ adev->gfx.config.max_texture_channel_caches =
++ gfx_info->info.max_texture_channel_caches;
++
++ ret = 0;
++ }
++ return ret;
++}
++
+ union igp_info {
+ struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+index 9e14420..8c2e696 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+@@ -144,6 +144,8 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
+
+ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev);
+
++int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev);
++
+ bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev,
+ struct amdgpu_atom_ss *ss,
+ int id, u32 clock);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+index 7a4b101..3283763 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+@@ -42,7 +42,7 @@ struct amdgpu_cgs_device {
+ struct amdgpu_device *adev = \
+ ((struct amdgpu_cgs_device *)cgs_device)->adev
+
+-static int amdgpu_cgs_gpu_mem_info(void *cgs_device, enum cgs_gpu_mem_type type,
++static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
+ uint64_t *mc_start, uint64_t *mc_size,
+ uint64_t *mem_size)
+ {
+@@ -73,7 +73,7 @@ static int amdgpu_cgs_gpu_mem_info(void *cgs_device, enum cgs_gpu_mem_type type,
+ return 0;
+ }
+
+-static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem,
++static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem,
+ uint64_t size,
+ uint64_t min_offset, uint64_t max_offset,
+ cgs_handle_t *kmem_handle, uint64_t *mcaddr)
+@@ -102,7 +102,7 @@ static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem,
+ return ret;
+ }
+
+-static int amdgpu_cgs_gunmap_kmem(void *cgs_device, cgs_handle_t kmem_handle)
++static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle)
+ {
+ struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
+
+@@ -118,7 +118,7 @@ static int amdgpu_cgs_gunmap_kmem(void *cgs_device, cgs_handle_t kmem_handle)
+ return 0;
+ }
+
+-static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device,
++static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
+ enum cgs_gpu_mem_type type,
+ uint64_t size, uint64_t align,
+ uint64_t min_offset, uint64_t max_offset,
+@@ -208,7 +208,7 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device,
+ return ret;
+ }
+
+-static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle)
++static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
+ {
+ struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
+
+@@ -225,7 +225,7 @@ static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle)
+ return 0;
+ }
+
+-static int amdgpu_cgs_gmap_gpu_mem(void *cgs_device, cgs_handle_t handle,
++static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
+ uint64_t *mcaddr)
+ {
+ int r;
+@@ -246,7 +246,7 @@ static int amdgpu_cgs_gmap_gpu_mem(void *cgs_device, cgs_handle_t handle,
+ return r;
+ }
+
+-static int amdgpu_cgs_gunmap_gpu_mem(void *cgs_device, cgs_handle_t handle)
++static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
+ {
+ int r;
+ struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
+@@ -258,7 +258,7 @@ static int amdgpu_cgs_gunmap_gpu_mem(void *cgs_device, cgs_handle_t handle)
+ return r;
+ }
+
+-static int amdgpu_cgs_kmap_gpu_mem(void *cgs_device, cgs_handle_t handle,
++static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
+ void **map)
+ {
+ int r;
+@@ -271,7 +271,7 @@ static int amdgpu_cgs_kmap_gpu_mem(void *cgs_device, cgs_handle_t handle,
+ return r;
+ }
+
+-static int amdgpu_cgs_kunmap_gpu_mem(void *cgs_device, cgs_handle_t handle)
++static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
+ {
+ int r;
+ struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
+@@ -283,20 +283,20 @@ static int amdgpu_cgs_kunmap_gpu_mem(void *cgs_device, cgs_handle_t handle)
+ return r;
+ }
+
+-static uint32_t amdgpu_cgs_read_register(void *cgs_device, unsigned offset)
++static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
+ {
+ CGS_FUNC_ADEV;
+ return RREG32(offset);
+ }
+
+-static void amdgpu_cgs_write_register(void *cgs_device, unsigned offset,
++static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned offset,
+ uint32_t value)
+ {
+ CGS_FUNC_ADEV;
+ WREG32(offset, value);
+ }
+
+-static uint32_t amdgpu_cgs_read_ind_register(void *cgs_device,
++static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
+ enum cgs_ind_reg space,
+ unsigned index)
+ {
+@@ -320,7 +320,7 @@ static uint32_t amdgpu_cgs_read_ind_register(void *cgs_device,
+ return 0;
+ }
+
+-static void amdgpu_cgs_write_ind_register(void *cgs_device,
++static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
+ enum cgs_ind_reg space,
+ unsigned index, uint32_t value)
+ {
+@@ -343,7 +343,7 @@ static void amdgpu_cgs_write_ind_register(void *cgs_device,
+ WARN(1, "Invalid indirect register space");
+ }
+
+-static uint8_t amdgpu_cgs_read_pci_config_byte(void *cgs_device, unsigned addr)
++static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr)
+ {
+ CGS_FUNC_ADEV;
+ uint8_t val;
+@@ -353,7 +353,7 @@ static uint8_t amdgpu_cgs_read_pci_config_byte(void *cgs_device, unsigned addr)
+ return val;
+ }
+
+-static uint16_t amdgpu_cgs_read_pci_config_word(void *cgs_device, unsigned addr)
++static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr)
+ {
+ CGS_FUNC_ADEV;
+ uint16_t val;
+@@ -363,7 +363,7 @@ static uint16_t amdgpu_cgs_read_pci_config_word(void *cgs_device, unsigned addr)
+ return val;
+ }
+
+-static uint32_t amdgpu_cgs_read_pci_config_dword(void *cgs_device,
++static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device,
+ unsigned addr)
+ {
+ CGS_FUNC_ADEV;
+@@ -374,7 +374,7 @@ static uint32_t amdgpu_cgs_read_pci_config_dword(void *cgs_device,
+ return val;
+ }
+
+-static void amdgpu_cgs_write_pci_config_byte(void *cgs_device, unsigned addr,
++static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, unsigned addr,
+ uint8_t value)
+ {
+ CGS_FUNC_ADEV;
+@@ -382,7 +382,7 @@ static void amdgpu_cgs_write_pci_config_byte(void *cgs_device, unsigned addr,
+ WARN(ret, "pci_write_config_byte error");
+ }
+
+-static void amdgpu_cgs_write_pci_config_word(void *cgs_device, unsigned addr,
++static void amdgpu_cgs_write_pci_config_word(struct cgs_device *cgs_device, unsigned addr,
+ uint16_t value)
+ {
+ CGS_FUNC_ADEV;
+@@ -390,7 +390,7 @@ static void amdgpu_cgs_write_pci_config_word(void *cgs_device, unsigned addr,
+ WARN(ret, "pci_write_config_word error");
+ }
+
+-static void amdgpu_cgs_write_pci_config_dword(void *cgs_device, unsigned addr,
++static void amdgpu_cgs_write_pci_config_dword(struct cgs_device *cgs_device, unsigned addr,
+ uint32_t value)
+ {
+ CGS_FUNC_ADEV;
+@@ -399,7 +399,7 @@ static void amdgpu_cgs_write_pci_config_dword(void *cgs_device, unsigned addr,
+ }
+
+
+-static int amdgpu_cgs_get_pci_resource(void *cgs_device,
++static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
+ enum cgs_resource_type resource_type,
+ uint64_t size,
+ uint64_t offset,
+@@ -433,7 +433,7 @@ static int amdgpu_cgs_get_pci_resource(void *cgs_device,
+ }
+ }
+
+-static const void *amdgpu_cgs_atom_get_data_table(void *cgs_device,
++static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device,
+ unsigned table, uint16_t *size,
+ uint8_t *frev, uint8_t *crev)
+ {
+@@ -449,7 +449,7 @@ static const void *amdgpu_cgs_atom_get_data_table(void *cgs_device,
+ return NULL;
+ }
+
+-static int amdgpu_cgs_atom_get_cmd_table_revs(void *cgs_device, unsigned table,
++static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table,
+ uint8_t *frev, uint8_t *crev)
+ {
+ CGS_FUNC_ADEV;
+@@ -462,7 +462,7 @@ static int amdgpu_cgs_atom_get_cmd_table_revs(void *cgs_device, unsigned table,
+ return -EINVAL;
+ }
+
+-static int amdgpu_cgs_atom_exec_cmd_table(void *cgs_device, unsigned table,
++static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table,
+ void *args)
+ {
+ CGS_FUNC_ADEV;
+@@ -471,33 +471,33 @@ static int amdgpu_cgs_atom_exec_cmd_table(void *cgs_device, unsigned table,
+ adev->mode_info.atom_context, table, args);
+ }
+
+-static int amdgpu_cgs_create_pm_request(void *cgs_device, cgs_handle_t *request)
++static int amdgpu_cgs_create_pm_request(struct cgs_device *cgs_device, cgs_handle_t *request)
+ {
+ /* TODO */
+ return 0;
+ }
+
+-static int amdgpu_cgs_destroy_pm_request(void *cgs_device, cgs_handle_t request)
++static int amdgpu_cgs_destroy_pm_request(struct cgs_device *cgs_device, cgs_handle_t request)
+ {
+ /* TODO */
+ return 0;
+ }
+
+-static int amdgpu_cgs_set_pm_request(void *cgs_device, cgs_handle_t request,
++static int amdgpu_cgs_set_pm_request(struct cgs_device *cgs_device, cgs_handle_t request,
+ int active)
+ {
+ /* TODO */
+ return 0;
+ }
+
+-static int amdgpu_cgs_pm_request_clock(void *cgs_device, cgs_handle_t request,
++static int amdgpu_cgs_pm_request_clock(struct cgs_device *cgs_device, cgs_handle_t request,
+ enum cgs_clock clock, unsigned freq)
+ {
+ /* TODO */
+ return 0;
+ }
+
+-static int amdgpu_cgs_pm_request_engine(void *cgs_device, cgs_handle_t request,
++static int amdgpu_cgs_pm_request_engine(struct cgs_device *cgs_device, cgs_handle_t request,
+ enum cgs_engine engine, int powered)
+ {
+ /* TODO */
+@@ -506,7 +506,7 @@ static int amdgpu_cgs_pm_request_engine(void *cgs_device, cgs_handle_t request,
+
+
+
+-static int amdgpu_cgs_pm_query_clock_limits(void *cgs_device,
++static int amdgpu_cgs_pm_query_clock_limits(struct cgs_device *cgs_device,
+ enum cgs_clock clock,
+ struct cgs_clock_limits *limits)
+ {
+@@ -514,7 +514,7 @@ static int amdgpu_cgs_pm_query_clock_limits(void *cgs_device,
+ return 0;
+ }
+
+-static int amdgpu_cgs_set_camera_voltages(void *cgs_device, uint32_t mask,
++static int amdgpu_cgs_set_camera_voltages(struct cgs_device *cgs_device, uint32_t mask,
+ const uint32_t *voltages)
+ {
+ DRM_ERROR("not implemented");
+@@ -612,7 +612,7 @@ static int amdgpu_cgs_irq_put(void *cgs_device, unsigned src_id, unsigned type)
+ return amdgpu_irq_put(adev, adev->irq.sources[src_id], type);
+ }
+
+-int amdgpu_cgs_set_clockgating_state(void *cgs_device,
++int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
+ enum amd_ip_block_type block_type,
+ enum amd_clockgating_state state)
+ {
+@@ -633,7 +633,7 @@ int amdgpu_cgs_set_clockgating_state(void *cgs_device,
+ return r;
+ }
+
+-int amdgpu_cgs_set_powergating_state(void *cgs_device,
++int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
+ enum amd_ip_block_type block_type,
+ enum amd_powergating_state state)
+ {
+@@ -655,7 +655,7 @@ int amdgpu_cgs_set_powergating_state(void *cgs_device,
+ }
+
+
+-static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type)
++static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
+ {
+ CGS_FUNC_ADEV;
+ enum AMDGPU_UCODE_ID result = AMDGPU_UCODE_ID_MAXIMUM;
+@@ -681,9 +681,10 @@ static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type)
+ result = AMDGPU_UCODE_ID_CP_MEC1;
+ break;
+ case CGS_UCODE_ID_CP_MEC_JT2:
+- if (adev->asic_type == CHIP_TONGA)
++ if (adev->asic_type == CHIP_TONGA || adev->asic_type == CHIP_POLARIS11
++ || adev->asic_type == CHIP_POLARIS10)
+ result = AMDGPU_UCODE_ID_CP_MEC2;
+- else if (adev->asic_type == CHIP_CARRIZO)
++ else
+ result = AMDGPU_UCODE_ID_CP_MEC1;
+ break;
+ case CGS_UCODE_ID_RLC_G:
+@@ -695,13 +696,13 @@ static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type)
+ return result;
+ }
+
+-static int amdgpu_cgs_get_firmware_info(void *cgs_device,
++static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
+ enum cgs_ucode_id type,
+ struct cgs_firmware_info *info)
+ {
+ CGS_FUNC_ADEV;
+
+- if (CGS_UCODE_ID_SMU != type) {
++ if ((CGS_UCODE_ID_SMU != type) && (CGS_UCODE_ID_SMU_SK != type)) {
+ uint64_t gpu_addr;
+ uint32_t data_size;
+ const struct gfx_firmware_header_v1_0 *header;
+@@ -734,30 +735,44 @@ static int amdgpu_cgs_get_firmware_info(void *cgs_device,
+ const uint8_t *src;
+ const struct smc_firmware_header_v1_0 *hdr;
+
+- switch (adev->asic_type) {
+- case CHIP_TONGA:
+- strcpy(fw_name, "amdgpu/tonga_smc.bin");
+- break;
+- case CHIP_FIJI:
+- strcpy(fw_name, "amdgpu/fiji_smc.bin");
+- break;
+- default:
+- DRM_ERROR("SMC firmware not supported\n");
+- return -EINVAL;
+- }
++ if (!adev->pm.fw) {
++ switch (adev->asic_type) {
++ case CHIP_TONGA:
++ strcpy(fw_name, "amdgpu/tonga_smc.bin");
++ break;
++ case CHIP_FIJI:
++ strcpy(fw_name, "amdgpu/fiji_smc.bin");
++ break;
++ case CHIP_POLARIS11:
++ if (type == CGS_UCODE_ID_SMU)
++ strcpy(fw_name, "amdgpu/polaris11_smc.bin");
++ else if (type == CGS_UCODE_ID_SMU_SK)
++ strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
++ break;
++ case CHIP_POLARIS10:
++ if (type == CGS_UCODE_ID_SMU)
++ strcpy(fw_name, "amdgpu/polaris10_smc.bin");
++ else if (type == CGS_UCODE_ID_SMU_SK)
++ strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
++ break;
++ default:
++ DRM_ERROR("SMC firmware not supported\n");
++ return -EINVAL;
++ }
+
+- err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
+- if (err) {
+- DRM_ERROR("Failed to request firmware\n");
+- return err;
+- }
++ err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
++ if (err) {
++ DRM_ERROR("Failed to request firmware\n");
++ return err;
++ }
+
+- err = amdgpu_ucode_validate(adev->pm.fw);
+- if (err) {
+- DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
+- release_firmware(adev->pm.fw);
+- adev->pm.fw = NULL;
+- return err;
++ err = amdgpu_ucode_validate(adev->pm.fw);
++ if (err) {
++ DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
++ release_firmware(adev->pm.fw);
++ adev->pm.fw = NULL;
++ return err;
++ }
+ }
+
+ hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
+@@ -774,7 +789,7 @@ static int amdgpu_cgs_get_firmware_info(void *cgs_device,
+ return 0;
+ }
+
+-static int amdgpu_cgs_query_system_info(void *cgs_device,
++static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
+ struct cgs_system_info *sys_info)
+ {
+ CGS_FUNC_ADEV;
+@@ -801,6 +816,9 @@ static int amdgpu_cgs_query_system_info(void *cgs_device,
+ case CGS_SYSTEM_INFO_PG_FLAGS:
+ sys_info->value = adev->pg_flags;
+ break;
++ case CGS_SYSTEM_INFO_GFX_CU_INFO:
++ sys_info->value = adev->gfx.cu_info.number;
++ break;
+ default:
+ return -ENODEV;
+ }
+@@ -808,7 +826,7 @@ static int amdgpu_cgs_query_system_info(void *cgs_device,
+ return 0;
+ }
+
+-static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
++static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
+ struct cgs_display_info *info)
+ {
+ CGS_FUNC_ADEV;
+@@ -816,10 +834,13 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
+ struct drm_device *ddev = adev->ddev;
+ struct drm_crtc *crtc;
+ uint32_t line_time_us, vblank_lines;
++ struct cgs_mode_info *mode_info;
+
+ if (info == NULL)
+ return -EINVAL;
+
++ mode_info = info->mode_info;
++
+ if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
+ list_for_each_entry(crtc,
+ &ddev->mode_config.crtc_list, head) {
+@@ -828,7 +849,7 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
+ info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
+ info->display_count++;
+ }
+- if (info->mode_info != NULL &&
++ if (mode_info != NULL &&
+ crtc->enabled && amdgpu_crtc->enabled &&
+ amdgpu_crtc->hw_mode.clock) {
+ line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
+@@ -836,10 +857,10 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
+ vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
+ amdgpu_crtc->hw_mode.crtc_vdisplay +
+ (amdgpu_crtc->v_border * 2);
+- info->mode_info->vblank_time_us = vblank_lines * line_time_us;
+- info->mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
+- info->mode_info->ref_clock = adev->clock.spll.reference_freq;
+- info->mode_info++;
++ mode_info->vblank_time_us = vblank_lines * line_time_us;
++ mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
++ mode_info->ref_clock = adev->clock.spll.reference_freq;
++ mode_info = NULL;
+ }
+ }
+ }
+@@ -847,6 +868,16 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
+ return 0;
+ }
+
++
++static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled)
++{
++ CGS_FUNC_ADEV;
++
++ adev->pm.dpm_enabled = enabled;
++
++ return 0;
++}
++
+ /** \brief evaluate acpi namespace object, handle or pathname must be valid
+ * \param cgs_device
+ * \param info input/output arguments for the control method
+@@ -854,7 +885,7 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
+ */
+
+ #if defined(CONFIG_ACPI)
+-static int amdgpu_cgs_acpi_eval_object(void *cgs_device,
++static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
+ struct cgs_acpi_method_info *info)
+ {
+ CGS_FUNC_ADEV;
+@@ -1017,14 +1048,14 @@ error:
+ return result;
+ }
+ #else
+-static int amdgpu_cgs_acpi_eval_object(void *cgs_device,
++static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
+ struct cgs_acpi_method_info *info)
+ {
+ return -EIO;
+ }
+ #endif
+
+-int amdgpu_cgs_call_acpi_method(void *cgs_device,
++int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
+ uint32_t acpi_method,
+ uint32_t acpi_function,
+ void *pinput, void *poutput,
+@@ -1097,6 +1128,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
+ amdgpu_cgs_set_powergating_state,
+ amdgpu_cgs_set_clockgating_state,
+ amdgpu_cgs_get_active_displays_info,
++ amdgpu_cgs_notify_dpm_enabled,
+ amdgpu_cgs_call_acpi_method,
+ amdgpu_cgs_query_system_info,
+ };
+@@ -1107,7 +1139,7 @@ static const struct cgs_os_ops amdgpu_cgs_os_ops = {
+ amdgpu_cgs_irq_put
+ };
+
+-void *amdgpu_cgs_create_device(struct amdgpu_device *adev)
++struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
+ {
+ struct amdgpu_cgs_device *cgs_device =
+ kmalloc(sizeof(*cgs_device), GFP_KERNEL);
+@@ -1121,10 +1153,10 @@ void *amdgpu_cgs_create_device(struct amdgpu_device *adev)
+ cgs_device->base.os_ops = &amdgpu_cgs_os_ops;
+ cgs_device->adev = adev;
+
+- return cgs_device;
++ return (struct cgs_device *)cgs_device;
+ }
+
+-void amdgpu_cgs_destroy_device(void *cgs_device)
++void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device)
+ {
+ kfree(cgs_device);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index d7e0b0b..2bbeeb0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -24,7 +24,6 @@
+ * Authors:
+ * Jerome Glisse <glisse@freedesktop.org>
+ */
+-#include <linux/list_sort.h>
+ #include <linux/pagemap.h>
+ #include <drm/drmP.h>
+ #include <drm/amdgpu_drm.h>
+@@ -88,44 +87,42 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
+ }
+
+ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
+- struct amdgpu_user_fence *uf,
+- struct drm_amdgpu_cs_chunk_fence *fence_data)
++ struct drm_amdgpu_cs_chunk_fence *data,
++ uint32_t *offset)
+ {
+ struct drm_gem_object *gobj;
+- uint32_t handle;
+
+- handle = fence_data->handle;
+ gobj = drm_gem_object_lookup(p->adev->ddev, p->filp,
+- fence_data->handle);
++ data->handle);
+ if (gobj == NULL)
+ return -EINVAL;
+
+- uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+- uf->offset = fence_data->offset;
+-
+- if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) {
+- drm_gem_object_unreference_unlocked(gobj);
+- return -EINVAL;
+- }
+-
+- p->uf_entry.robj = amdgpu_bo_ref(uf->bo);
++ p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ p->uf_entry.priority = 0;
+ p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
+ p->uf_entry.tv.shared = true;
+ p->uf_entry.user_pages = NULL;
++ *offset = data->offset;
+
+ drm_gem_object_unreference_unlocked(gobj);
++
++ if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
++ amdgpu_bo_unref(&p->uf_entry.robj);
++ return -EINVAL;
++ }
++
+ return 0;
+ }
+
+ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+ {
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
++ struct amdgpu_vm *vm = &fpriv->vm;
+ union drm_amdgpu_cs *cs = data;
+ uint64_t *chunk_array_user;
+ uint64_t *chunk_array;
+- struct amdgpu_user_fence uf = {};
+ unsigned size, num_ibs = 0;
++ uint32_t uf_offset = 0;
+ int i;
+ int ret;
+
+@@ -200,7 +197,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+ goto free_partial_kdata;
+ }
+
+- ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata);
++ ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
++ &uf_offset);
+ if (ret)
+ goto free_partial_kdata;
+
+@@ -215,11 +213,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+ }
+ }
+
+- ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
++ ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
+ if (ret)
+ goto free_all_kdata;
+
+- p->job->uf = uf;
++ if (p->uf_entry.robj) {
++ p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
++ p->job->uf_offset = uf_offset;
++ }
+
+ kfree(chunk_array);
+ return 0;
+@@ -377,7 +378,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
+ INIT_LIST_HEAD(&duplicates);
+ amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
+
+- if (p->job->uf.bo)
++ if (p->uf_entry.robj)
+ list_add(&p->uf_entry.tv.head, &p->validated);
+
+ if (need_mmap_lock)
+@@ -473,6 +474,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
+ goto error_validate;
+
+ if (p->bo_list) {
++ struct amdgpu_bo *gds = p->bo_list->gds_obj;
++ struct amdgpu_bo *gws = p->bo_list->gws_obj;
++ struct amdgpu_bo *oa = p->bo_list->oa_obj;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ unsigned i;
+
+@@ -481,6 +485,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
+
+ p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
+ }
++
++ if (gds) {
++ p->job->gds_base = amdgpu_bo_gpu_offset(gds);
++ p->job->gds_size = amdgpu_bo_size(gds);
++ }
++ if (gws) {
++ p->job->gws_base = amdgpu_bo_gpu_offset(gws);
++ p->job->gws_size = amdgpu_bo_size(gws);
++ }
++ if (oa) {
++ p->job->oa_base = amdgpu_bo_gpu_offset(oa);
++ p->job->oa_size = amdgpu_bo_size(oa);
++ }
+ }
+
+ error_validate:
+@@ -527,16 +544,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
+ return 0;
+ }
+
+-static int cmp_size_smaller_first(void *priv, struct list_head *a,
+- struct list_head *b)
+-{
+- struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
+- struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
+-
+- /* Sort A before B if A is smaller. */
+- return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
+-}
+-
+ /**
+ * cs_parser_fini() - clean parser states
+ * @parser: parser structure holding parsing context.
+@@ -553,18 +560,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
+ if (!error) {
+ amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
+- /* Sort the buffer list from the smallest to largest buffer,
+- * which affects the order of buffers in the LRU list.
+- * This assures that the smallest buffers are added first
+- * to the LRU list, so they are likely to be later evicted
+- * first, instead of large buffers whose eviction is more
+- * expensive.
+- *
+- * This slightly lowers the number of bytes moved by TTM
+- * per frame under memory pressure.
+- */
+- list_sort(NULL, &parser->validated, cmp_size_smaller_first);
+-
+ ttm_eu_fence_buffer_objects(&parser->ticket,
+ &parser->validated,
+ parser->fence);
+@@ -763,41 +758,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
+
+ ib->length_dw = chunk_ib->ib_bytes / 4;
+ ib->flags = chunk_ib->flags;
+- ib->ctx = parser->ctx;
+ j++;
+ }
+
+- /* add GDS resources to first IB */
+- if (parser->bo_list) {
+- struct amdgpu_bo *gds = parser->bo_list->gds_obj;
+- struct amdgpu_bo *gws = parser->bo_list->gws_obj;
+- struct amdgpu_bo *oa = parser->bo_list->oa_obj;
+- struct amdgpu_ib *ib = &parser->job->ibs[0];
+-
+- if (gds) {
+- ib->gds_base = amdgpu_bo_gpu_offset(gds);
+- ib->gds_size = amdgpu_bo_size(gds);
+- }
+- if (gws) {
+- ib->gws_base = amdgpu_bo_gpu_offset(gws);
+- ib->gws_size = amdgpu_bo_size(gws);
+- }
+- if (oa) {
+- ib->oa_base = amdgpu_bo_gpu_offset(oa);
+- ib->oa_size = amdgpu_bo_size(oa);
+- }
+- }
+- /* wrap the last IB with user fence */
+- if (parser->job->uf.bo) {
+- struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
+-
+- /* UVD & VCE fw doesn't support user fences */
+- if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
+- parser->job->ring->type == AMDGPU_RING_TYPE_VCE)
+- return -EINVAL;
+-
+- ib->user = &parser->job->uf;
+- }
++ /* UVD & VCE fw doesn't support user fences */
++ if (parser->job->uf_bo && (
++ parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
++ parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
++ return -EINVAL;
+
+ return 0;
+ }
+@@ -862,6 +830,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
+ {
+ struct amdgpu_ring *ring = p->job->ring;
++ struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+ struct fence *fence;
+ struct amdgpu_job *job;
+ int r;
+@@ -870,19 +839,19 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+ p->job = NULL;
+
+ r = amd_sched_job_init(&job->base, &ring->sched,
+- &p->ctx->rings[ring->idx].entity,
+- amdgpu_job_timeout_func,
+- amdgpu_job_free_func,
+- p->filp, &fence);
++ entity, amdgpu_job_timeout_func,
++ amdgpu_job_free_func,
++ p->filp, &fence);
+ if (r) {
+ amdgpu_job_free(job);
+ return r;
+ }
+
+ job->owner = p->filp;
++ job->ctx = entity->fence_context;
+ p->fence = fence_get(fence);
+ cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
+- job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
++ job->uf_sequence = cs->out.handle;
+
+ trace_amdgpu_cs_ioctl(job);
+ amd_sched_entity_push_job(&job->base);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 6e38497..04d5a38 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -59,15 +59,11 @@ static const char *amdgpu_asic_name[] = {
+ "FIJI",
+ "CARRIZO",
+ "STONEY",
++ "POLARIS10",
++ "POLARIS11",
+ "LAST",
+ };
+
+-#if defined(CONFIG_VGA_SWITCHEROO)
+-bool amdgpu_has_atpx_dgpu_power_cntl(void);
+-#else
+-static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+-#endif
+-
+ bool amdgpu_device_is_px(struct drm_device *dev)
+ {
+ struct amdgpu_device *adev = dev->dev_private;
+@@ -352,7 +348,7 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev)
+ adev->doorbell.base = pci_resource_start(adev->pdev, 2);
+ adev->doorbell.size = pci_resource_len(adev->pdev, 2);
+
+- adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
++ adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
+ AMDGPU_DOORBELL_MAX_ASSIGNMENT+1);
+ if (adev->doorbell.num_doorbells == 0)
+ return -EINVAL;
+@@ -942,15 +938,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
+ }
+
+ if (amdgpu_gart_size != -1) {
+- /* gtt size must be power of two and greater or equal to 32M */
++ /* gtt size must be greater or equal to 32M */
+ if (amdgpu_gart_size < 32) {
+ dev_warn(adev->dev, "gart size (%d) too small\n",
+ amdgpu_gart_size);
+ amdgpu_gart_size = -1;
+- } else if (!amdgpu_check_pot_argument(amdgpu_gart_size)) {
+- dev_warn(adev->dev, "gart size (%d) must be a power of 2\n",
+- amdgpu_gart_size);
+- amdgpu_gart_size = -1;
+ }
+ }
+
+@@ -1150,6 +1142,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
+ case CHIP_TOPAZ:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
++ case CHIP_POLARIS11:
++ case CHIP_POLARIS10:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
+@@ -1202,7 +1196,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
+ if (r == -ENOENT) {
+ adev->ip_block_status[i].valid = false;
+ } else if (r) {
+- DRM_ERROR("early_init %d failed %d\n", i, r);
++ DRM_ERROR("early_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ return r;
+ } else {
+ adev->ip_block_status[i].valid = true;
+@@ -1225,7 +1219,7 @@ static int amdgpu_init(struct amdgpu_device *adev)
+ continue;
+ r = adev->ip_blocks[i].funcs->sw_init((void *)adev);
+ if (r) {
+- DRM_ERROR("sw_init %d failed %d\n", i, r);
++ DRM_ERROR("sw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ return r;
+ }
+ adev->ip_block_status[i].sw = true;
+@@ -1258,7 +1252,7 @@ static int amdgpu_init(struct amdgpu_device *adev)
+ continue;
+ r = adev->ip_blocks[i].funcs->hw_init((void *)adev);
+ if (r) {
+- DRM_ERROR("hw_init %d failed %d\n", i, r);
++ DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ return r;
+ }
+ adev->ip_block_status[i].hw = true;
+@@ -1278,13 +1272,13 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
+ r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
+ AMD_CG_STATE_GATE);
+ if (r) {
+- DRM_ERROR("set_clockgating_state(gate) %d failed %d\n", i, r);
++ DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ return r;
+ }
+ if (adev->ip_blocks[i].funcs->late_init) {
+ r = adev->ip_blocks[i].funcs->late_init((void *)adev);
+ if (r) {
+- DRM_ERROR("late_init %d failed %d\n", i, r);
++ DRM_ERROR("late_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ return r;
+ }
+ }
+@@ -1308,13 +1302,13 @@ static int amdgpu_fini(struct amdgpu_device *adev)
+ r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
+ AMD_CG_STATE_UNGATE);
+ if (r) {
+- DRM_ERROR("set_clockgating_state(ungate) %d failed %d\n", i, r);
++ DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ return r;
+ }
+ r = adev->ip_blocks[i].funcs->hw_fini((void *)adev);
+ /* XXX handle errors */
+ if (r) {
+- DRM_DEBUG("hw_fini %d failed %d\n", i, r);
++ DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ }
+ adev->ip_block_status[i].hw = false;
+ }
+@@ -1325,7 +1319,7 @@ static int amdgpu_fini(struct amdgpu_device *adev)
+ r = adev->ip_blocks[i].funcs->sw_fini((void *)adev);
+ /* XXX handle errors */
+ if (r) {
+- DRM_DEBUG("sw_fini %d failed %d\n", i, r);
++ DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ }
+ adev->ip_block_status[i].sw = false;
+ adev->ip_block_status[i].valid = false;
+@@ -1338,20 +1332,29 @@ static int amdgpu_suspend(struct amdgpu_device *adev)
+ {
+ int i, r;
+
++ /* ungate SMC block first */
++ r = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
++ AMD_CG_STATE_UNGATE);
++ if (r) {
++ DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n",r);
++ }
++
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_block_status[i].valid)
+ continue;
+ /* ungate blocks so that suspend can properly shut them down */
+- r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
+- AMD_CG_STATE_UNGATE);
+- if (r) {
+- DRM_ERROR("set_clockgating_state(ungate) %d failed %d\n", i, r);
++ if (i != AMD_IP_BLOCK_TYPE_SMC) {
++ r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
++ AMD_CG_STATE_UNGATE);
++ if (r) {
++ DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
++ }
+ }
+ /* XXX handle errors */
+ r = adev->ip_blocks[i].funcs->suspend(adev);
+ /* XXX handle errors */
+ if (r) {
+- DRM_ERROR("suspend %d failed %d\n", i, r);
++ DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ }
+ }
+
+@@ -1367,7 +1370,7 @@ static int amdgpu_resume(struct amdgpu_device *adev)
+ continue;
+ r = adev->ip_blocks[i].funcs->resume(adev);
+ if (r) {
+- DRM_ERROR("resume %d failed %d\n", i, r);
++ DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+ return r;
+ }
+ }
+@@ -1391,9 +1394,11 @@ bool amdgpu_device_has_dal_support(struct amdgpu_device *adev)
+ case CHIP_HAWAII:
+ return amdgpu_dal != 0;
+ #endif
+-#if defined(CONFIG_DRM_AMD_DAL) && defined(CONFIG_DRM_AMD_DAL_DCE11_0)
++#if defined(CONFIG_DRM_AMD_DAL) && (defined(CONFIG_DRM_AMD_DAL_DCE11_0) || defined(CONFIG_DRM_AMD_DAL_DCE11_2))
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
++ case CHIP_POLARIS11:
++ case CHIP_POLARIS10:
+ return amdgpu_dal != 0;
+ #endif
+ #if defined(CONFIG_DRM_AMD_DAL) && defined(CONFIG_DRM_AMD_DAL_DCE10_0)
+@@ -1517,7 +1522,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+
+ if (amdgpu_runtime_pm == 1)
+ runtime = true;
+- if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl())
++ if (amdgpu_device_is_px(ddev))
+ runtime = true;
+ vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
+ if (runtime)
+@@ -1812,6 +1817,9 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
+ if (r)
+ DRM_ERROR("amdgpu_resume failed (%d).\n", r);
+
++ if (r)
++ DRM_ERROR("amdgpu_resume failed (%d).\n", r);
++
+ amdgpu_fence_driver_resume(adev);
+
+ if (resume) {
+@@ -2066,7 +2074,7 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev)
+ * Debugfs
+ */
+ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
+- struct drm_info_list *files,
++ const struct drm_info_list *files,
+ unsigned nfiles)
+ {
+ unsigned i;
+@@ -2178,32 +2186,246 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
+ return result;
+ }
+
++static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
++ size_t size, loff_t *pos)
++{
++ struct amdgpu_device *adev = f->f_inode->i_private;
++ ssize_t result = 0;
++ int r;
++
++ if (size & 0x3 || *pos & 0x3)
++ return -EINVAL;
++
++ while (size) {
++ uint32_t value;
++
++ value = RREG32_PCIE(*pos >> 2);
++ r = put_user(value, (uint32_t *)buf);
++ if (r)
++ return r;
++
++ result += 4;
++ buf += 4;
++ *pos += 4;
++ size -= 4;
++ }
++
++ return result;
++}
++
++static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
++ size_t size, loff_t *pos)
++{
++ struct amdgpu_device *adev = f->f_inode->i_private;
++ ssize_t result = 0;
++ int r;
++
++ if (size & 0x3 || *pos & 0x3)
++ return -EINVAL;
++
++ while (size) {
++ uint32_t value;
++
++ r = get_user(value, (uint32_t *)buf);
++ if (r)
++ return r;
++
++ WREG32_PCIE(*pos >> 2, value);
++
++ result += 4;
++ buf += 4;
++ *pos += 4;
++ size -= 4;
++ }
++
++ return result;
++}
++
++static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
++ size_t size, loff_t *pos)
++{
++ struct amdgpu_device *adev = f->f_inode->i_private;
++ ssize_t result = 0;
++ int r;
++
++ if (size & 0x3 || *pos & 0x3)
++ return -EINVAL;
++
++ while (size) {
++ uint32_t value;
++
++ value = RREG32_DIDT(*pos >> 2);
++ r = put_user(value, (uint32_t *)buf);
++ if (r)
++ return r;
++
++ result += 4;
++ buf += 4;
++ *pos += 4;
++ size -= 4;
++ }
++
++ return result;
++}
++
++static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
++ size_t size, loff_t *pos)
++{
++ struct amdgpu_device *adev = f->f_inode->i_private;
++ ssize_t result = 0;
++ int r;
++
++ if (size & 0x3 || *pos & 0x3)
++ return -EINVAL;
++
++ while (size) {
++ uint32_t value;
++
++ r = get_user(value, (uint32_t *)buf);
++ if (r)
++ return r;
++
++ WREG32_DIDT(*pos >> 2, value);
++
++ result += 4;
++ buf += 4;
++ *pos += 4;
++ size -= 4;
++ }
++
++ return result;
++}
++
++static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
++ size_t size, loff_t *pos)
++{
++ struct amdgpu_device *adev = f->f_inode->i_private;
++ ssize_t result = 0;
++ int r;
++
++ if (size & 0x3 || *pos & 0x3)
++ return -EINVAL;
++
++ while (size) {
++ uint32_t value;
++
++ value = RREG32_SMC(*pos >> 2);
++ r = put_user(value, (uint32_t *)buf);
++ if (r)
++ return r;
++
++ result += 4;
++ buf += 4;
++ *pos += 4;
++ size -= 4;
++ }
++
++ return result;
++}
++
++static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
++ size_t size, loff_t *pos)
++{
++ struct amdgpu_device *adev = f->f_inode->i_private;
++ ssize_t result = 0;
++ int r;
++
++ if (size & 0x3 || *pos & 0x3)
++ return -EINVAL;
++
++ while (size) {
++ uint32_t value;
++
++ r = get_user(value, (uint32_t *)buf);
++ if (r)
++ return r;
++
++ WREG32_SMC(*pos >> 2, value);
++
++ result += 4;
++ buf += 4;
++ *pos += 4;
++ size -= 4;
++ }
++
++ return result;
++}
++
+ static const struct file_operations amdgpu_debugfs_regs_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_regs_read,
+ .write = amdgpu_debugfs_regs_write,
+ .llseek = default_llseek
+ };
++static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
++ .owner = THIS_MODULE,
++ .read = amdgpu_debugfs_regs_didt_read,
++ .write = amdgpu_debugfs_regs_didt_write,
++ .llseek = default_llseek
++};
++static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
++ .owner = THIS_MODULE,
++ .read = amdgpu_debugfs_regs_pcie_read,
++ .write = amdgpu_debugfs_regs_pcie_write,
++ .llseek = default_llseek
++};
++static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
++ .owner = THIS_MODULE,
++ .read = amdgpu_debugfs_regs_smc_read,
++ .write = amdgpu_debugfs_regs_smc_write,
++ .llseek = default_llseek
++};
++
++static const struct file_operations *debugfs_regs[] = {
++ &amdgpu_debugfs_regs_fops,
++ &amdgpu_debugfs_regs_didt_fops,
++ &amdgpu_debugfs_regs_pcie_fops,
++ &amdgpu_debugfs_regs_smc_fops,
++};
++
++static const char *debugfs_regs_names[] = {
++ "amdgpu_regs",
++ "amdgpu_regs_didt",
++ "amdgpu_regs_pcie",
++ "amdgpu_regs_smc",
++};
+
+ static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+ {
+ struct drm_minor *minor = adev->ddev->primary;
+ struct dentry *ent, *root = minor->debugfs_root;
++ unsigned i, j;
++
++ for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
++ ent = debugfs_create_file(debugfs_regs_names[i],
++ S_IFREG | S_IRUGO, root,
++ adev, debugfs_regs[i]);
++ if (IS_ERR(ent)) {
++ for (j = 0; j < i; j++) {
++ debugfs_remove(adev->debugfs_regs[i]);
++ adev->debugfs_regs[i] = NULL;
++ }
++ return PTR_ERR(ent);
++ }
+
+- ent = debugfs_create_file("amdgpu_regs", S_IFREG | S_IRUGO, root,
+- adev, &amdgpu_debugfs_regs_fops);
+- if (IS_ERR(ent))
+- return PTR_ERR(ent);
+- i_size_write(ent->d_inode, adev->rmmio_size);
+- adev->debugfs_regs = ent;
++ if (!i)
++ i_size_write(ent->d_inode, adev->rmmio_size);
++ adev->debugfs_regs[i] = ent;
++ }
+
+ return 0;
+ }
+
+ static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
+ {
+- debugfs_remove(adev->debugfs_regs);
+- adev->debugfs_regs = NULL;
++ unsigned i;
++
++ for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
++ if (adev->debugfs_regs[i]) {
++ debugfs_remove(adev->debugfs_regs[i]);
++ adev->debugfs_regs[i] = NULL;
++ }
++ }
+ }
+
+ int amdgpu_debugfs_init(struct drm_minor *minor)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index 604ed4d..f949be1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -50,9 +50,11 @@
+ * KMS wrapper.
+ * - 3.0.0 - initial driver
+ * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
++ * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same
++ * at the end of IBs.
+ */
+ #define KMS_DRIVER_MAJOR 3
+-#define KMS_DRIVER_MINOR 1
++#define KMS_DRIVER_MINOR 2
+ #define KMS_DRIVER_PATCHLEVEL 0
+
+ int amdgpu_vram_limit = 0;
+@@ -170,7 +172,7 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
+ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
+ module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+
+-static struct pci_device_id pciidlist[] = {
++static const struct pci_device_id pciidlist[] = {
+ #ifdef CONFIG_DRM_AMDGPU_CIK
+ /* Kaveri */
+ {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+@@ -281,6 +283,28 @@ static struct pci_device_id pciidlist[] = {
+ {0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+ /* stoney */
+ {0x1002, 0x98E4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_STONEY|AMD_IS_APU},
++ /* Polaris11 */
++ {0x1002, 0x67E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ {0x1002, 0x67E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ {0x1002, 0x67E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ {0x1002, 0x67EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ {0x1002, 0x67EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ {0x1002, 0x67FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ {0x1002, 0x67E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ {0x1002, 0x67E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ {0x1002, 0x67E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
++ /* Polaris10 */
++ {0x1002, 0x67C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67C8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
++ {0x1002, 0x67CF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+
+ {0, 0, 0}
+ };
+@@ -322,6 +346,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
+ return -ENODEV;
+ }
+
++ /*
++ * Initialize amdkfd before starting radeon. If it was not loaded yet,
++ * defer radeon probing
++ */
++ ret = amdgpu_amdkfd_init();
++ if (ret == -EPROBE_DEFER)
++ return ret;
++
+ /* Get rid of things like offb */
+ ret = amdgpu_kick_out_firmware_fb(pdev);
+ if (ret)
+@@ -549,9 +581,12 @@ static struct pci_driver amdgpu_kms_pci_driver = {
+ .driver.pm = &amdgpu_pm_ops,
+ };
+
++
++
+ static int __init amdgpu_init(void)
+ {
+ amdgpu_sync_init();
++ amdgpu_fence_slab_init();
+ #ifdef CONFIG_VGA_CONSOLE
+ if (vgacon_text_force()) {
+ DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
+@@ -564,9 +599,6 @@ static int __init amdgpu_init(void)
+ driver->driver_features |= DRIVER_MODESET;
+ driver->num_ioctls = amdgpu_max_kms_ioctl;
+ amdgpu_register_atpx_handler();
+-
+- amdgpu_amdkfd_init();
+-
+ /* let modprobe override vga console setting */
+ return drm_pci_init(driver, pdriver);
+ }
+@@ -577,6 +609,7 @@ static void __exit amdgpu_exit(void)
+ drm_pci_exit(driver, pdriver);
+ amdgpu_unregister_atpx_handler();
+ amdgpu_sync_fini();
++ amdgpu_fence_slab_fini();
+ }
+
+ module_init(amdgpu_init);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index 4303b44..2b89db4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -55,8 +55,21 @@ struct amdgpu_fence {
+ };
+
+ static struct kmem_cache *amdgpu_fence_slab;
+-static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
+
++int amdgpu_fence_slab_init(void)
++{
++ amdgpu_fence_slab = kmem_cache_create(
++ "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
++ SLAB_HWCACHE_ALIGN, NULL);
++ if (!amdgpu_fence_slab)
++ return -ENOMEM;
++ return 0;
++}
++
++void amdgpu_fence_slab_fini(void)
++{
++ kmem_cache_destroy(amdgpu_fence_slab);
++}
+ /*
+ * Cast helper
+ */
+@@ -121,7 +134,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
+ {
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_fence *fence;
+- struct fence **ptr;
++ struct fence *old, **ptr;
+ uint32_t seq;
+
+ fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+@@ -141,7 +154,11 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
+ /* This function can't be called concurrently anyway, otherwise
+ * emitting the fence would mess up the hardware ring buffer.
+ */
+- BUG_ON(rcu_dereference_protected(*ptr, 1));
++ old = rcu_dereference_protected(*ptr, 1);
++ if (old && !fence_is_signaled(old)) {
++ DRM_INFO("rcu slot is busy\n");
++ fence_wait(old, false);
++ }
+
+ rcu_assign_pointer(*ptr, fence_get(&fence->base));
+
+@@ -348,9 +365,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
+ (unsigned long)ring);
+
+- ring->fence_drv.num_fences_mask = num_hw_submission - 1;
++ ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
+ spin_lock_init(&ring->fence_drv.lock);
+- ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
++ ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
+ GFP_KERNEL);
+ if (!ring->fence_drv.fences)
+ return -ENOMEM;
+@@ -392,13 +409,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ */
+ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
+ {
+- if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
+- amdgpu_fence_slab = kmem_cache_create(
+- "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
+- SLAB_HWCACHE_ALIGN, NULL);
+- if (!amdgpu_fence_slab)
+- return -ENOMEM;
+- }
+ if (amdgpu_debugfs_fence_init(adev))
+ dev_err(adev->dev, "fence debugfs file creation failed\n");
+
+@@ -437,9 +447,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
+ kfree(ring->fence_drv.fences);
+ ring->fence_drv.initialized = false;
+ }
+-
+- if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+- kmem_cache_destroy(amdgpu_fence_slab);
+ }
+
+ /**
+@@ -635,7 +642,7 @@ static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
+ return 0;
+ }
+
+-static struct drm_info_list amdgpu_debugfs_fence_list[] = {
++static const struct drm_info_list amdgpu_debugfs_fence_list[] = {
+ {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
+ {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+index 7312d72..921bce2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+@@ -238,18 +238,17 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+ t = offset / AMDGPU_GPU_PAGE_SIZE;
+ p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ for (i = 0; i < pages; i++, p++) {
+- if (adev->gart.pages[p]) {
+- adev->gart.pages[p] = NULL;
+- adev->gart.pages_addr[p] = adev->dummy_page.addr;
+- page_base = adev->gart.pages_addr[p];
+- if (!adev->gart.ptr)
+- continue;
++#ifdef CONFIG_AMDGPU_GART_DEBUGFS
++ adev->gart.pages[p] = NULL;
++#endif
++ page_base = adev->dummy_page.addr;
++ if (!adev->gart.ptr)
++ continue;
+
+- for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+- amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
+- t, page_base, flags);
+- page_base += AMDGPU_GPU_PAGE_SIZE;
+- }
++ for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
++ amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
++ t, page_base, flags);
++ page_base += AMDGPU_GPU_PAGE_SIZE;
+ }
+ }
+ mb();
+@@ -287,10 +286,11 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+ p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+
+ for (i = 0; i < pages; i++, p++) {
+- adev->gart.pages_addr[p] = dma_addr[i];
++#ifdef CONFIG_AMDGPU_GART_DEBUGFS
+ adev->gart.pages[p] = pagelist[i];
++#endif
+ if (adev->gart.ptr) {
+- page_base = adev->gart.pages_addr[p];
++ page_base = dma_addr[i];
+ for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+ amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags);
+ page_base += AMDGPU_GPU_PAGE_SIZE;
+@@ -312,11 +312,11 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+ */
+ int amdgpu_gart_init(struct amdgpu_device *adev)
+ {
+- int r, i;
++ int r;
+
+- if (adev->gart.pages) {
++ if (adev->dummy_page.page)
+ return 0;
+- }
++
+ /* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
+ if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) {
+ DRM_ERROR("Page size is smaller than GPU page size!\n");
+@@ -330,22 +330,16 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
+ adev->gart.num_gpu_pages = adev->mc.gtt_size / AMDGPU_GPU_PAGE_SIZE;
+ DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
+ adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
++
++#ifdef CONFIG_AMDGPU_GART_DEBUGFS
+ /* Allocate pages table */
+ adev->gart.pages = vzalloc(sizeof(void *) * adev->gart.num_cpu_pages);
+ if (adev->gart.pages == NULL) {
+ amdgpu_gart_fini(adev);
+ return -ENOMEM;
+ }
+- adev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) *
+- adev->gart.num_cpu_pages);
+- if (adev->gart.pages_addr == NULL) {
+- amdgpu_gart_fini(adev);
+- return -ENOMEM;
+- }
+- /* set GART entry to point to the dummy page by default */
+- for (i = 0; i < adev->gart.num_cpu_pages; i++) {
+- adev->gart.pages_addr[i] = adev->dummy_page.addr;
+- }
++#endif
++
+ return 0;
+ }
+
+@@ -358,15 +352,14 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
+ */
+ void amdgpu_gart_fini(struct amdgpu_device *adev)
+ {
+- if (adev->gart.pages && adev->gart.pages_addr && adev->gart.ready) {
++ if (adev->gart.ready) {
+ /* unbind pages */
+ amdgpu_gart_unbind(adev, 0, adev->gart.num_cpu_pages);
+ }
+ adev->gart.ready = false;
++#ifdef CONFIG_AMDGPU_GART_DEBUGFS
+ vfree(adev->gart.pages);
+- vfree(adev->gart.pages_addr);
+ adev->gart.pages = NULL;
+- adev->gart.pages_addr = NULL;
+-
++#endif
+ amdgpu_dummy_page_fini(adev);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+index c3f4e85..503d540 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+@@ -43,7 +43,7 @@ struct amdgpu_ring;
+ struct amdgpu_bo;
+
+ struct amdgpu_gds_asic_info {
+- uint32_t total_size;
++ uint32_t total_size;
+ uint32_t gfx_partition_size;
+ uint32_t cs_partition_size;
+ };
+@@ -52,8 +52,8 @@ struct amdgpu_gds {
+ struct amdgpu_gds_asic_info mem;
+ struct amdgpu_gds_asic_info gws;
+ struct amdgpu_gds_asic_info oa;
+- /* At present, GDS, GWS and OA resources for gfx (graphics)
+- * is always pre-allocated and available for graphics operation.
++ /* At present, GDS, GWS and OA resources for gfx (graphics)
++ * is always pre-allocated and available for graphics operation.
+ * Such resource is shared between all gfx clients.
+ * TODO: move this operation to user space
+ * */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+index 3f8997a..0635bb6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+@@ -141,25 +141,40 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
+ void amdgpu_gem_object_close(struct drm_gem_object *obj,
+ struct drm_file *file_priv)
+ {
+- struct amdgpu_bo *rbo = gem_to_amdgpu_bo(obj);
+- struct amdgpu_device *adev = rbo->adev;
++ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
++ struct amdgpu_device *adev = bo->adev;
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
++
++ struct amdgpu_bo_list_entry vm_pd;
++ struct list_head list, duplicates;
++ struct ttm_validate_buffer tv;
++ struct ww_acquire_ctx ticket;
+ struct amdgpu_bo_va *bo_va;
+ int r;
+- r = amdgpu_bo_reserve(rbo, true);
++
++ INIT_LIST_HEAD(&list);
++ INIT_LIST_HEAD(&duplicates);
++
++ tv.bo = &bo->tbo;
++ tv.shared = true;
++ list_add(&tv.head, &list);
++
++ amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
++
++ r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+ if (r) {
+ dev_err(adev->dev, "leaking bo va because "
+ "we fail to reserve bo (%d)\n", r);
+ return;
+ }
+- bo_va = amdgpu_vm_bo_find(vm, rbo);
++ bo_va = amdgpu_vm_bo_find(vm, bo);
+ if (bo_va) {
+ if (--bo_va->ref_count == 0) {
+ amdgpu_vm_bo_rmv(adev, bo_va);
+ }
+ }
+- amdgpu_bo_unreserve(rbo);
++ ttm_eu_backoff_reservation(&ticket, &list);
+ }
+
+ static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
+@@ -579,11 +594,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
+ tv.shared = true;
+ list_add(&tv.head, &list);
+
+- if (args->operation == AMDGPU_VA_OP_MAP) {
+- tv_pd.bo = &fpriv->vm.page_directory->tbo;
+- tv_pd.shared = true;
+- list_add(&tv_pd.head, &list);
+- }
++ tv_pd.bo = &fpriv->vm.page_directory->tbo;
++ tv_pd.shared = true;
++ list_add(&tv_pd.head, &list);
++
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+ if (r) {
+ drm_gem_object_unreference_unlocked(gobj);
+@@ -783,7 +797,7 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
+ return 0;
+ }
+
+-static struct drm_info_list amdgpu_debugfs_gem_list[] = {
++static const struct drm_info_list amdgpu_debugfs_gem_list[] = {
+ {"amdgpu_gem_info", &amdgpu_debugfs_gem_info, 0, NULL},
+ };
+ #endif
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+index 644336d..34e3542 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+@@ -74,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+ }
+
+- ib->vm = vm;
+- ib->vm_id = 0;
+-
+ return 0;
+ }
+
+@@ -89,7 +86,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ *
+ * Free an IB (all asics).
+ */
+-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f)
++void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
++ struct fence *f)
+ {
+ amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
+ }
+@@ -117,29 +115,37 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fen
+ */
+ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ struct amdgpu_ib *ibs, struct fence *last_vm_update,
+- struct fence **f)
++ struct amdgpu_job *job, struct fence **f)
+ {
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib *ib = &ibs[0];
+- struct amdgpu_ctx *ctx, *old_ctx;
++ bool skip_preamble, need_ctx_switch;
++ unsigned patch_offset = ~0;
+ struct amdgpu_vm *vm;
+ struct fence *hwf;
+- unsigned i, patch_offset = ~0;
++ uint64_t ctx;
+
++ unsigned i;
+ int r = 0;
+
+ if (num_ibs == 0)
+ return -EINVAL;
+
+- ctx = ibs->ctx;
+- vm = ibs->vm;
++ /* ring tests don't use a job */
++ if (job) {
++ vm = job->vm;
++ ctx = job->ctx;
++ } else {
++ vm = NULL;
++ ctx = 0;
++ }
+
+ if (!ring->ready) {
+ dev_err(adev->dev, "couldn't schedule ib\n");
+ return -EINVAL;
+ }
+
+- if (vm && !ibs->vm_id) {
++ if (vm && !job->vm_id) {
+ dev_err(adev->dev, "VM IB without ID\n");
+ return -EINVAL;
+ }
+@@ -154,54 +160,54 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ patch_offset = amdgpu_ring_init_cond_exec(ring);
+
+ if (vm) {
+- /* do context switch */
+- amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
+- ib->gds_base, ib->gds_size,
+- ib->gws_base, ib->gws_size,
+- ib->oa_base, ib->oa_size);
+-
+- if (ring->funcs->emit_hdp_flush)
+- amdgpu_ring_emit_hdp_flush(ring);
++ r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
++ job->gds_base, job->gds_size,
++ job->gws_base, job->gws_size,
++ job->oa_base, job->oa_size);
++ if (r) {
++ amdgpu_ring_undo(ring);
++ return r;
++ }
+ }
+
++ if (ring->funcs->emit_hdp_flush)
++ amdgpu_ring_emit_hdp_flush(ring);
++
+ /* always set cond_exec_polling to CONTINUE */
+ *ring->cond_exe_cpu_addr = 1;
+
+- old_ctx = ring->current_ctx;
++ skip_preamble = ring->current_ctx == ctx;
++ need_ctx_switch = ring->current_ctx != ctx;
+ for (i = 0; i < num_ibs; ++i) {
+ ib = &ibs[i];
+
+- if (ib->ctx != ctx || ib->vm != vm) {
+- ring->current_ctx = old_ctx;
+- if (ib->vm_id)
+- amdgpu_vm_reset_id(adev, ib->vm_id);
+- amdgpu_ring_undo(ring);
+- return -EINVAL;
+- }
+- amdgpu_ring_emit_ib(ring, ib);
+- ring->current_ctx = ctx;
+- }
++ /* drop preamble IBs if we don't have a context switch */
++ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
++ continue;
+
+- if (vm) {
+- if (ring->funcs->emit_hdp_invalidate)
+- amdgpu_ring_emit_hdp_invalidate(ring);
++ amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
++ need_ctx_switch);
++ need_ctx_switch = false;
+ }
+
++ if (ring->funcs->emit_hdp_invalidate)
++ amdgpu_ring_emit_hdp_invalidate(ring);
++
+ r = amdgpu_fence_emit(ring, &hwf);
+ if (r) {
+ dev_err(adev->dev, "failed to emit fence (%d)\n", r);
+- ring->current_ctx = old_ctx;
+- if (ib->vm_id)
+- amdgpu_vm_reset_id(adev, ib->vm_id);
++ if (job && job->vm_id)
++ amdgpu_vm_reset_id(adev, job->vm_id);
+ amdgpu_ring_undo(ring);
+ return r;
+ }
+
+ /* wrap the last IB with fence */
+- if (ib->user) {
+- uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
+- addr += ib->user->offset;
+- amdgpu_ring_emit_fence(ring, addr, ib->sequence,
++ if (job && job->uf_bo) {
++ uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);
++
++ addr += job->uf_offset;
++ amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
+ AMDGPU_FENCE_FLAG_64BIT);
+ }
+
+@@ -211,6 +217,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
+ amdgpu_ring_patch_cond_exec(ring, patch_offset);
+
++ ring->current_ctx = ctx;
+ amdgpu_ring_commit(ring);
+ return 0;
+ }
+@@ -325,7 +332,7 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data)
+
+ }
+
+-static struct drm_info_list amdgpu_debugfs_sa_list[] = {
++static const struct drm_info_list amdgpu_debugfs_sa_list[] = {
+ {"amdgpu_sa_info", &amdgpu_debugfs_sa_info, 0, NULL},
+ };
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+index 04ded38..8d34ccd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+@@ -25,6 +25,7 @@
+ * Alex Deucher
+ * Jerome Glisse
+ */
++#include <linux/irq.h>
+ #include <drm/drmP.h>
+ #include <drm/drm_crtc_helper.h>
+ #include <drm/amdgpu_drm.h>
+@@ -239,6 +240,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
+ INIT_WORK(&adev->hotplug_work,
+ amdgpu_hotplug_work_func);
+ }
++ adev->ddev->vblank_disable_allowed = true;
+
+ INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);
+
+@@ -505,7 +507,7 @@ static int amdgpu_irqdomain_map(struct irq_domain *d,
+ return 0;
+ }
+
+-static struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
++static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
+ .map = amdgpu_irqdomain_map,
+ };
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+index a052ac2..23c8c84 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+@@ -46,7 +46,7 @@ void amdgpu_job_timeout_func(struct work_struct *work)
+ }
+
+ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+- struct amdgpu_job **job)
++ struct amdgpu_job **job, struct amdgpu_vm *vm)
+ {
+ size_t size = sizeof(struct amdgpu_job);
+
+@@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+ return -ENOMEM;
+
+ (*job)->adev = adev;
++ (*job)->vm = vm;
+ (*job)->ibs = (void *)&(*job)[1];
+ (*job)->num_ibs = num_ibs;
+ INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);
+@@ -74,7 +75,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+ {
+ int r;
+
+- r = amdgpu_job_alloc(adev, 1, job);
++ r = amdgpu_job_alloc(adev, 1, job, NULL);
+ if (r)
+ return r;
+
+@@ -96,7 +97,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
+ amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
+ fence_put(job->fence);
+
+- amdgpu_bo_unref(&job->uf.bo);
++ amdgpu_bo_unref(&job->uf_bo);
+ amdgpu_sync_free(&job->sync);
+
+ if (!job->base.use_sched)
+@@ -121,14 +122,13 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ return -EINVAL;
+
+ r = amd_sched_job_init(&job->base, &ring->sched,
+- entity,
+- amdgpu_job_timeout_func,
+- amdgpu_job_free_func,
+- owner, &fence);
++ entity, amdgpu_job_timeout_func,
++ amdgpu_job_free_func, owner, &fence);
+ if (r)
+ return r;
+
+ job->owner = owner;
++ job->ctx = entity->fence_context;
+ *f = fence_get(fence);
+ amd_sched_entity_push_job(&job->base);
+
+@@ -138,27 +138,19 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
+ {
+ struct amdgpu_job *job = to_amdgpu_job(sched_job);
+- struct amdgpu_vm *vm = job->ibs->vm;
++ struct amdgpu_vm *vm = job->vm;
+
+ struct fence *fence = amdgpu_sync_get_fence(&job->sync);
+
+- if (fence == NULL && vm && !job->ibs->vm_id) {
++ if (fence == NULL && vm && !job->vm_id) {
+ struct amdgpu_ring *ring = job->ring;
+- unsigned i, vm_id;
+- uint64_t vm_pd_addr;
+ int r;
+
+ r = amdgpu_vm_grab_id(vm, ring, &job->sync,
+ &job->base.s_fence->base,
+- &vm_id, &vm_pd_addr);
++ &job->vm_id, &job->vm_pd_addr);
+ if (r)
+ DRM_ERROR("Error getting VM ID (%d)\n", r);
+- else {
+- for (i = 0; i < job->num_ibs; ++i) {
+- job->ibs[i].vm_id = vm_id;
+- job->ibs[i].vm_pd_addr = vm_pd_addr;
+- }
+- }
+
+ fence = amdgpu_sync_get_fence(&job->sync);
+ }
+@@ -186,7 +178,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
+
+ trace_amdgpu_sched_run_job(job);
+ r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
+- job->sync.last_vm_update, &fence);
++ job->sync.last_vm_update, job, &fence);
+ if (r) {
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ goto err;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index 45d3b6a..bf327c6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -303,7 +303,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ fw_info.feature = adev->vce.fb_version;
+ break;
+ case AMDGPU_INFO_FW_UVD:
+- fw_info.ver = 0;
++ fw_info.ver = adev->uvd.fw_version;
+ fw_info.feature = 0;
+ break;
+ case AMDGPU_INFO_FW_GMC:
+@@ -382,8 +382,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ struct drm_amdgpu_info_vram_gtt vram_gtt;
+
+ vram_gtt.vram_size = adev->mc.real_vram_size;
++ vram_gtt.vram_size -= adev->vram_pin_size;
+ vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size;
+- vram_gtt.vram_cpu_accessible_size -= adev->vram_pin_size;
++ vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
+ vram_gtt.gtt_size = adev->mc.gtt_size;
+ vram_gtt.gtt_size -= adev->gart_pin_size;
+ return copy_to_user(out, &vram_gtt,
+@@ -426,7 +427,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ }
+ case AMDGPU_INFO_DEV_INFO: {
+ struct drm_amdgpu_info_device dev_info = {};
+- struct amdgpu_cu_info cu_info;
+
+ dev_info.device_id = dev->pdev->device;
+ dev_info.chip_rev = adev->rev_id;
+@@ -460,11 +460,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ AMDGPU_GPU_PAGE_SIZE;
+ dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
+
+- amdgpu_asic_get_cu_info(adev, &cu_info);
+- dev_info.cu_active_number = cu_info.number;
+- dev_info.cu_ao_mask = cu_info.ao_cu_mask;
++ dev_info.cu_active_number = adev->gfx.cu_info.number;
++ dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
+ dev_info.ce_ram_size = adev->gfx.ce_ram_size;
+- memcpy(&dev_info.cu_bitmap[0], &cu_info.bitmap[0], sizeof(cu_info.bitmap));
++ memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
++ sizeof(adev->gfx.cu_info.bitmap));
+ dev_info.vram_type = adev->mc.vram_type;
+ dev_info.vram_bit_width = adev->mc.vram_width;
+ dev_info.vce_harvest_config = adev->vce.harvest_config;
+@@ -753,4 +753,4 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ };
+-int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
++const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+index 151a2d4..7ecea83 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -424,9 +424,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
+ bo->pin_count = 1;
+ if (gpu_addr != NULL)
+ *gpu_addr = amdgpu_bo_gpu_offset(bo);
+- if (domain == AMDGPU_GEM_DOMAIN_VRAM)
++ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ bo->adev->vram_pin_size += amdgpu_bo_size(bo);
+- else
++ if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
++ bo->adev->invisible_pin_size += amdgpu_bo_size(bo);
++ } else
+ bo->adev->gart_pin_size += amdgpu_bo_size(bo);
+ } else {
+ dev_err(bo->adev->dev, "%p pin failed\n", bo);
+@@ -456,9 +458,11 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
+ }
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+ if (likely(r == 0)) {
+- if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
++ if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
+ bo->adev->vram_pin_size -= amdgpu_bo_size(bo);
+- else
++ if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
++ bo->adev->invisible_pin_size -= amdgpu_bo_size(bo);
++ } else
+ bo->adev->gart_pin_size -= amdgpu_bo_size(bo);
+ } else {
+ dev_err(bo->adev->dev, "%p validate failed for unpin\n", bo);
+@@ -476,6 +480,17 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
+ return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
+ }
+
++static const char *amdgpu_vram_names[] = {
++ "UNKNOWN",
++ "GDDR1",
++ "DDR2",
++ "GDDR3",
++ "GDDR4",
++ "GDDR5",
++ "HBM",
++ "DDR3"
++};
++
+ int amdgpu_bo_init(struct amdgpu_device *adev)
+ {
+ /* Add an MTRR for the VRAM */
+@@ -484,8 +499,8 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
+ DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
+ adev->mc.mc_vram_size >> 20,
+ (unsigned long long)adev->mc.aper_size >> 20);
+- DRM_INFO("RAM width %dbits DDR\n",
+- adev->mc.vram_width);
++ DRM_INFO("RAM width %dbits %s\n",
++ adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
+ return amdgpu_ttm_init(adev);
+ }
+
+@@ -526,6 +541,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
+ if (!metadata_size) {
+ if (bo->metadata_size) {
+ kfree(bo->metadata);
++ bo->metadata = NULL;
+ bo->metadata_size = 0;
+ }
+ return 0;
+@@ -608,6 +624,10 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+ if ((offset + size) <= adev->mc.visible_vram_size)
+ return 0;
+
++ /* Can't move a pinned BO to visible VRAM */
++ if (abo->pin_count > 0)
++ return -EINVAL;
++
+ /* hurrah the memory is not visible ! */
+ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
+ lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+index be6388f..7700dc2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+@@ -57,9 +57,10 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+ ttm_bo_kunmap(&bo->dma_buf_vmap);
+ }
+
+-struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+- struct dma_buf_attachment *attach,
+- struct sg_table *sg)
++struct drm_gem_object *
++amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
++ struct dma_buf_attachment *attach,
++ struct sg_table *sg)
+ {
+ struct reservation_object *resv = attach->dmabuf->resv;
+ struct amdgpu_device *adev = dev->dev_private;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+index dd79243..1b0b7ae 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+@@ -28,6 +28,7 @@
+ */
+ #include <linux/seq_file.h>
+ #include <linux/slab.h>
++#include <linux/debugfs.h>
+ #include <drm/drmP.h>
+ #include <drm/amdgpu_drm.h>
+ #include "amdgpu.h"
+@@ -46,7 +47,8 @@
+ * wptr. The GPU then starts fetching commands and executes
+ * them until the pointers are equal again.
+ */
+-static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
++static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
++ struct amdgpu_ring *ring);
+
+ /**
+ * amdgpu_ring_alloc - allocate space on the ring buffer
+@@ -215,18 +217,17 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring structure holding ring information
+- * @ring_size: size of the ring
++ * @max_ndw: maximum number of dw for ring alloc
+ * @nop: nop packet for this ring
+ *
+ * Initialize the driver information for the selected ring (all asics).
+ * Returns 0 on success, error on failure.
+ */
+ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+- unsigned ring_size, u32 nop, u32 align_mask,
++ unsigned max_dw, u32 nop, u32 align_mask,
+ struct amdgpu_irq_src *irq_src, unsigned irq_type,
+ enum amdgpu_ring_type ring_type)
+ {
+- u32 rb_bufsz;
+ int r;
+
+ if (ring->adev == NULL) {
+@@ -265,7 +266,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+ dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r);
+ return r;
+ }
+- ring->next_rptr_gpu_addr = adev->wb.gpu_addr + (ring->next_rptr_offs * 4);
++ ring->next_rptr_gpu_addr = adev->wb.gpu_addr + ring->next_rptr_offs * 4;
+ ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
+
+ r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
+@@ -283,10 +284,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+ return r;
+ }
+
+- /* Align ring size */
+- rb_bufsz = order_base_2(ring_size / 8);
+- ring_size = (1 << (rb_bufsz + 1)) * 4;
+- ring->ring_size = ring_size;
++ ring->ring_size = roundup_pow_of_two(max_dw * 4 *
++ amdgpu_sched_hw_submission);
+ ring->align_mask = align_mask;
+ ring->nop = nop;
+ ring->type = ring_type;
+@@ -319,8 +318,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+ }
+ }
+ ring->ptr_mask = (ring->ring_size / 4) - 1;
+- ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4,
+- amdgpu_sched_hw_submission);
++ ring->max_dw = max_dw;
+
+ if (amdgpu_debugfs_ring_init(adev, ring)) {
+ DRM_ERROR("Failed to register debugfs file for rings !\n");
+@@ -367,96 +365,82 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
+ */
+ #if defined(CONFIG_DEBUG_FS)
+
+-static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
++/* Layout of file is 12 bytes consisting of
++ * - rptr
++ * - wptr
++ * - driver's copy of wptr
++ *
++ * followed by n-words of ring data
++ */
++static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
++ size_t size, loff_t *pos)
+ {
+- struct drm_info_node *node = (struct drm_info_node *) m->private;
+- struct drm_device *dev = node->minor->dev;
+- struct amdgpu_device *adev = dev->dev_private;
+- int roffset = *(int*)node->info_ent->data;
+- struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
+-
+- uint32_t rptr, wptr, rptr_next;
+- unsigned i;
+-
+- wptr = amdgpu_ring_get_wptr(ring);
+- seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
+-
+- rptr = amdgpu_ring_get_rptr(ring);
+- rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
+-
+- seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
+-
+- seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
+- ring->wptr, ring->wptr);
+-
+- if (!ring->ready)
+- return 0;
+-
+- /* print 8 dw before current rptr as often it's the last executed
+- * packet that is the root issue
+- */
+- i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
+- while (i != rptr) {
+- seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
+- if (i == rptr)
+- seq_puts(m, " *");
+- if (i == rptr_next)
+- seq_puts(m, " #");
+- seq_puts(m, "\n");
+- i = (i + 1) & ring->ptr_mask;
++ struct amdgpu_ring *ring = (struct amdgpu_ring*)f->f_inode->i_private;
++ int r, i;
++ uint32_t value, result, early[3];
++
++ if (*pos & 3 || size & 3)
++ return -EINVAL;
++
++ result = 0;
++
++ if (*pos < 12) {
++ early[0] = amdgpu_ring_get_rptr(ring);
++ early[1] = amdgpu_ring_get_wptr(ring);
++ early[2] = ring->wptr;
++ for (i = *pos / 4; i < 3 && size; i++) {
++ r = put_user(early[i], (uint32_t *)buf);
++ if (r)
++ return r;
++ buf += 4;
++ result += 4;
++ size -= 4;
++ *pos += 4;
++ }
+ }
+- while (i != wptr) {
+- seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
+- if (i == rptr)
+- seq_puts(m, " *");
+- if (i == rptr_next)
+- seq_puts(m, " #");
+- seq_puts(m, "\n");
+- i = (i + 1) & ring->ptr_mask;
++
++ while (size) {
++ if (*pos >= (ring->ring_size + 12))
++ return result;
++
++ value = ring->ring[(*pos - 12)/4];
++ r = put_user(value, (uint32_t*)buf);
++ if (r)
++ return r;
++ buf += 4;
++ result += 4;
++ size -= 4;
++ *pos += 4;
+ }
+- return 0;
++
++ return result;
+ }
+
+-/* TODO: clean this up !*/
+-static int amdgpu_gfx_index = offsetof(struct amdgpu_device, gfx.gfx_ring[0]);
+-static int cayman_cp1_index = offsetof(struct amdgpu_device, gfx.compute_ring[0]);
+-static int cayman_cp2_index = offsetof(struct amdgpu_device, gfx.compute_ring[1]);
+-static int amdgpu_dma1_index = offsetof(struct amdgpu_device, sdma.instance[0].ring);
+-static int amdgpu_dma2_index = offsetof(struct amdgpu_device, sdma.instance[1].ring);
+-static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring);
+-static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]);
+-static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]);
+-
+-static struct drm_info_list amdgpu_debugfs_ring_info_list[] = {
+- {"amdgpu_ring_gfx", amdgpu_debugfs_ring_info, 0, &amdgpu_gfx_index},
+- {"amdgpu_ring_cp1", amdgpu_debugfs_ring_info, 0, &cayman_cp1_index},
+- {"amdgpu_ring_cp2", amdgpu_debugfs_ring_info, 0, &cayman_cp2_index},
+- {"amdgpu_ring_dma1", amdgpu_debugfs_ring_info, 0, &amdgpu_dma1_index},
+- {"amdgpu_ring_dma2", amdgpu_debugfs_ring_info, 0, &amdgpu_dma2_index},
+- {"amdgpu_ring_uvd", amdgpu_debugfs_ring_info, 0, &r600_uvd_index},
+- {"amdgpu_ring_vce1", amdgpu_debugfs_ring_info, 0, &si_vce1_index},
+- {"amdgpu_ring_vce2", amdgpu_debugfs_ring_info, 0, &si_vce2_index},
++static const struct file_operations amdgpu_debugfs_ring_fops = {
++ .owner = THIS_MODULE,
++ .read = amdgpu_debugfs_ring_read,
++ .llseek = default_llseek
+ };
+
+ #endif
+
+-static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
++static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
++ struct amdgpu_ring *ring)
+ {
+ #if defined(CONFIG_DEBUG_FS)
+- unsigned i;
+- for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) {
+- struct drm_info_list *info = &amdgpu_debugfs_ring_info_list[i];
+- int roffset = *(int*)amdgpu_debugfs_ring_info_list[i].data;
+- struct amdgpu_ring *other = (void *)(((uint8_t*)adev) + roffset);
+- unsigned r;
++ struct drm_minor *minor = adev->ddev->primary;
++ struct dentry *ent, *root = minor->debugfs_root;
++ char name[32];
+
+- if (other != ring)
+- continue;
++ sprintf(name, "amdgpu_ring_%s", ring->name);
+
+- r = amdgpu_debugfs_add_files(adev, info, 1);
+- if (r)
+- return r;
+- }
++ ent = debugfs_create_file(name,
++ S_IFREG | S_IRUGO, root,
++ ring, &amdgpu_debugfs_ring_fops);
++ if (IS_ERR(ent))
++ return PTR_ERR(ent);
++
++ i_size_write(ent->d_inode, ring->ring_size + 12);
+ #endif
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+index c48b4fc..34a9280 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+@@ -109,6 +109,29 @@ static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
+ }
+
+ /**
++ * amdgpu_sync_add_later - add the fence to the hash
++ *
++ * @sync: sync object to add the fence to
++ * @f: fence to add
++ *
++ * Tries to add the fence to an existing hash entry. Returns true when an entry
++ * was found, false otherwise.
++ */
++static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f)
++{
++ struct amdgpu_sync_entry *e;
++
++ hash_for_each_possible(sync->fences, e, node, f->context) {
++ if (unlikely(e->fence->context != f->context))
++ continue;
++
++ amdgpu_sync_keep_later(&e->fence, f);
++ return true;
++ }
++ return false;
++}
++
++/**
+ * amdgpu_sync_fence - remember to sync to this fence
+ *
+ * @sync: sync object to add fence to
+@@ -127,13 +150,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+ amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
+ amdgpu_sync_keep_later(&sync->last_vm_update, f);
+
+- hash_for_each_possible(sync->fences, e, node, f->context) {
+- if (unlikely(e->fence->context != f->context))
+- continue;
+-
+- amdgpu_sync_keep_later(&e->fence, f);
++ if (amdgpu_sync_add_later(sync, f))
+ return 0;
+- }
+
+ e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+ if (!e)
+@@ -204,6 +222,81 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
+ return r;
+ }
+
++/**
++ * amdgpu_sync_is_idle - test if all fences are signaled
++ *
++ * @sync: the sync object
++ *
++ * Returns true if all fences in the sync object are signaled.
++ */
++bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
++{
++ struct amdgpu_sync_entry *e;
++ struct hlist_node *tmp;
++ int i;
++
++ hash_for_each_safe(sync->fences, i, tmp, e, node) {
++ struct fence *f = e->fence;
++
++ if (fence_is_signaled(f)) {
++ hash_del(&e->node);
++ fence_put(f);
++ kmem_cache_free(amdgpu_sync_slab, e);
++ continue;
++ }
++
++ return false;
++ }
++
++ return true;
++}
++
++/**
++ * amdgpu_sync_cycle_fences - move fences from one sync object into another
++ *
++ * @dst: the destination sync object
++ * @src: the source sync object
++ * @fence: fence to add to source
++ *
++ * Remove all fences from source and put them into destination and add
++ * fence as new one into source.
++ */
++int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
++ struct fence *fence)
++{
++ struct amdgpu_sync_entry *e, *newone;
++ struct hlist_node *tmp;
++ int i;
++
++ /* Allocate the new entry before moving the old ones */
++ newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
++ if (!newone)
++ return -ENOMEM;
++
++ hash_for_each_safe(src->fences, i, tmp, e, node) {
++ struct fence *f = e->fence;
++
++ hash_del(&e->node);
++ if (fence_is_signaled(f)) {
++ fence_put(f);
++ kmem_cache_free(amdgpu_sync_slab, e);
++ continue;
++ }
++
++ if (amdgpu_sync_add_later(dst, f)) {
++ kmem_cache_free(amdgpu_sync_slab, e);
++ continue;
++ }
++
++ hash_add(dst->fences, &e->node, f->context);
++ }
++
++ hash_add(src->fences, &newone->node, fence->context);
++ newone->fence = fence_get(fence);
++
++ return 0;
++}
++
+ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
+ {
+ struct amdgpu_sync_entry *e;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index 0f42b1a..3390282 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -223,6 +223,8 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
+ {
+ struct amdgpu_bo *rbo = container_of(bo, struct amdgpu_bo, tbo);
+
++ if (amdgpu_ttm_tt_get_usermm(bo->ttm))
++ return -EPERM;
+ return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
+ }
+
+@@ -384,9 +386,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
+ struct ttm_mem_reg *new_mem)
+ {
+ struct amdgpu_device *adev;
++ struct amdgpu_bo *abo;
+ struct ttm_mem_reg *old_mem = &bo->mem;
+ int r;
+
++ /* Can't move a pinned BO */
++ abo = container_of(bo, struct amdgpu_bo, tbo);
++ if (WARN_ON_ONCE(abo->pin_count > 0))
++ return -EINVAL;
++
+ adev = amdgpu_get_adev(bo->bdev);
+ if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
+ amdgpu_move_null(bo, new_mem);
+@@ -921,6 +929,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
+
+ int amdgpu_ttm_init(struct amdgpu_device *adev)
+ {
++ unsigned i, j;
+ int r;
+
+ r = amdgpu_ttm_global_init(adev);
+@@ -938,6 +947,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
+ DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+ return r;
+ }
++
++ for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
++ struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
++
++ for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
++ lru->lru[j] = &adev->mman.bdev.man[j].lru;
++ lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
++ }
++
+ adev->mman.initialized = true;
+ r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
+ adev->mc.real_vram_size >> PAGE_SHIFT);
+@@ -1160,7 +1178,7 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
+ static int ttm_pl_vram = TTM_PL_VRAM;
+ static int ttm_pl_tt = TTM_PL_TT;
+
+-static struct drm_info_list amdgpu_ttm_debugfs_list[] = {
++static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
+ {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
+ {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
+ {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
+@@ -1211,6 +1229,8 @@ static const struct file_operations amdgpu_ttm_vram_fops = {
+ .llseek = default_llseek
+ };
+
++#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
++
+ static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+ {
+@@ -1258,6 +1278,8 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
+
+ #endif
+
++#endif
++
+ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
+ {
+ #if defined(CONFIG_DEBUG_FS)
+@@ -1273,6 +1295,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
+ i_size_write(ent->d_inode, adev->mc.mc_vram_size);
+ adev->mman.vram = ent;
+
++#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+ ent = debugfs_create_file("amdgpu_gtt", S_IFREG | S_IRUGO, root,
+ adev, &amdgpu_ttm_gtt_fops);
+ if (IS_ERR(ent))
+@@ -1280,6 +1303,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
+ i_size_write(ent->d_inode, adev->mc.gtt_size);
+ adev->mman.gtt = ent;
+
++#endif
+ count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
+
+ #ifdef CONFIG_SWIOTLB
+@@ -1301,7 +1325,10 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
+ debugfs_remove(adev->mman.vram);
+ adev->mman.vram = NULL;
+
++#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+ debugfs_remove(adev->mman.gtt);
+ adev->mman.gtt = NULL;
+ #endif
++
++#endif
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+index 917145b..16d58b8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+@@ -41,19 +41,23 @@
+
+ /* 1 second timeout */
+ #define UVD_IDLE_TIMEOUT_MS 1000
++/* Polaris10/11 firmware version */
++#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
+
+ /* Firmware Names */
+ #ifdef CONFIG_DRM_AMDGPU_CIK
+ #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin"
+-#define FIRMWARE_KABINI "radeon/kabini_uvd.bin"
+-#define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin"
+-#define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin"
++#define FIRMWARE_KABINI "radeon/kabini_uvd.bin"
++#define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin"
++#define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin"
+ #define FIRMWARE_MULLINS "radeon/mullins_uvd.bin"
+ #endif
+ #define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin"
+ #define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin"
+ #define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin"
+ #define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin"
++#define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin"
++#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
+
+ /**
+ * amdgpu_uvd_cs_ctx - Command submission parser context
+@@ -85,6 +89,8 @@ MODULE_FIRMWARE(FIRMWARE_TONGA);
+ MODULE_FIRMWARE(FIRMWARE_CARRIZO);
+ MODULE_FIRMWARE(FIRMWARE_FIJI);
+ MODULE_FIRMWARE(FIRMWARE_STONEY);
++MODULE_FIRMWARE(FIRMWARE_POLARIS10);
++MODULE_FIRMWARE(FIRMWARE_POLARIS11);
+
+ static void amdgpu_uvd_note_usage(struct amdgpu_device *adev);
+ static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
+@@ -131,6 +137,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
+ case CHIP_STONEY:
+ fw_name = FIRMWARE_STONEY;
+ break;
++ case CHIP_POLARIS10:
++ fw_name = FIRMWARE_POLARIS10;
++ break;
++ case CHIP_POLARIS11:
++ fw_name = FIRMWARE_POLARIS11;
++ break;
+ default:
+ return -EINVAL;
+ }
+@@ -161,6 +173,15 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
+ DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
+ version_major, version_minor, family_id);
+
++ adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
++ (family_id << 8));
++
++ if ((adev->asic_type == CHIP_POLARIS10 ||
++ adev->asic_type == CHIP_POLARIS11) &&
++ (adev->uvd.fw_version < FW_1_66_16))
++ DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
++ version_major, version_minor);
++
+ /*
+ * Limit the number of UVD handles depending on microcode major
+ * and minor versions. The firmware version which has 40 UVD
+@@ -255,32 +276,30 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
+
+ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+ {
+- struct amdgpu_ring *ring = &adev->uvd.ring;
+- int i, r;
++ unsigned size;
++ void *ptr;
++ int i;
+
+ if (adev->uvd.vcpu_bo == NULL)
+ return 0;
+
+- for (i = 0; i < adev->uvd.max_handles; ++i) {
+- uint32_t handle = atomic_read(&adev->uvd.handles[i]);
+- if (handle != 0) {
+- struct fence *fence;
++ for (i = 0; i < adev->uvd.max_handles; ++i)
++ if (atomic_read(&adev->uvd.handles[i]))
++ break;
+
+- amdgpu_uvd_note_usage(adev);
++ if (i == AMDGPU_MAX_UVD_HANDLES)
++ return 0;
+
+- r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence);
+- if (r) {
+- DRM_ERROR("Error destroying UVD (%d)!\n", r);
+- continue;
+- }
++ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+- fence_wait(fence, false);
+- fence_put(fence);
++ size = amdgpu_bo_size(adev->uvd.vcpu_bo);
++ ptr = adev->uvd.cpu_addr;
+
+- adev->uvd.filp[i] = NULL;
+- atomic_set(&adev->uvd.handles[i], 0);
+- }
+- }
++ adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
++ if (!adev->uvd.saved_bo)
++ return -ENOMEM;
++
++ memcpy(adev->uvd.saved_bo, ptr, size);
+
+ return 0;
+ }
+@@ -289,23 +308,29 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
+ {
+ unsigned size;
+ void *ptr;
+- const struct common_firmware_header *hdr;
+- unsigned offset;
+
+ if (adev->uvd.vcpu_bo == NULL)
+ return -EINVAL;
+
+- hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+- memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
+- (adev->uvd.fw->size) - offset);
+-
+ size = amdgpu_bo_size(adev->uvd.vcpu_bo);
+- size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr = adev->uvd.cpu_addr;
+- ptr += le32_to_cpu(hdr->ucode_size_bytes);
+
+- memset(ptr, 0, size);
++ if (adev->uvd.saved_bo != NULL) {
++ memcpy(ptr, adev->uvd.saved_bo, size);
++ kfree(adev->uvd.saved_bo);
++ adev->uvd.saved_bo = NULL;
++ } else {
++ const struct common_firmware_header *hdr;
++ unsigned offset;
++
++ hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
++ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
++ memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
++ (adev->uvd.fw->size) - offset);
++ size -= le32_to_cpu(hdr->ucode_size_bytes);
++ ptr += le32_to_cpu(hdr->ucode_size_bytes);
++ memset(ptr, 0, size);
++ }
+
+ return 0;
+ }
+@@ -397,7 +422,8 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
+ *
+ * Peek into the decode message and calculate the necessary buffer sizes.
+ */
+-static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
++static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
++ unsigned buf_sizes[])
+ {
+ unsigned stream_type = msg[4];
+ unsigned width = msg[6];
+@@ -419,7 +445,6 @@ static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
+
+ switch (stream_type) {
+ case 0: /* H264 */
+- case 7: /* H264 Perf */
+ switch(level) {
+ case 30:
+ num_dpb_buffer = 8100 / fs_in_mb;
+@@ -497,6 +522,54 @@ static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
+ min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
+ break;
+
++ case 7: /* H264 Perf */
++ switch(level) {
++ case 30:
++ num_dpb_buffer = 8100 / fs_in_mb;
++ break;
++ case 31:
++ num_dpb_buffer = 18000 / fs_in_mb;
++ break;
++ case 32:
++ num_dpb_buffer = 20480 / fs_in_mb;
++ break;
++ case 41:
++ num_dpb_buffer = 32768 / fs_in_mb;
++ break;
++ case 42:
++ num_dpb_buffer = 34816 / fs_in_mb;
++ break;
++ case 50:
++ num_dpb_buffer = 110400 / fs_in_mb;
++ break;
++ case 51:
++ num_dpb_buffer = 184320 / fs_in_mb;
++ break;
++ default:
++ num_dpb_buffer = 184320 / fs_in_mb;
++ break;
++ }
++ num_dpb_buffer++;
++ if (num_dpb_buffer > 17)
++ num_dpb_buffer = 17;
++
++ /* reference picture buffer */
++ min_dpb_size = image_size * num_dpb_buffer;
++
++ if (adev->asic_type < CHIP_POLARIS10){
++ /* macroblock context buffer */
++ min_dpb_size +=
++ width_in_mb * height_in_mb * num_dpb_buffer * 192;
++
++ /* IT surface buffer */
++ min_dpb_size += width_in_mb * height_in_mb * 32;
++ } else {
++ /* macroblock context buffer */
++ min_ctx_size =
++ width_in_mb * height_in_mb * num_dpb_buffer * 192;
++ }
++ break;
++
+ case 16: /* H265 */
+ image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
+ image_size = ALIGN(image_size, 256);
+@@ -592,7 +665,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
+
+ case 1:
+ /* it's a decode msg, calc buffer sizes */
+- r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes);
++ r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes);
+ amdgpu_bo_kunmap(bo);
+ if (r)
+ return r;
+@@ -613,7 +686,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
+
+ case 2:
+ /* it's a destroy msg, free the handle */
+- for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
++ for (i = 0; i < adev->uvd.max_handles; ++i)
+ atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
+ amdgpu_bo_kunmap(bo);
+ return 0;
+@@ -893,7 +966,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+ ib->length_dw = 16;
+
+ if (direct) {
+- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
++ r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
+ job->fence = f;
+ if (r)
+ goto err_free;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+index 4bec0c1..875626a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+@@ -41,15 +41,17 @@
+ /* Firmware Names */
+ #ifdef CONFIG_DRM_AMDGPU_CIK
+ #define FIRMWARE_BONAIRE "radeon/bonaire_vce.bin"
+-#define FIRMWARE_KABINI "radeon/kabini_vce.bin"
+-#define FIRMWARE_KAVERI "radeon/kaveri_vce.bin"
+-#define FIRMWARE_HAWAII "radeon/hawaii_vce.bin"
++#define FIRMWARE_KABINI "radeon/kabini_vce.bin"
++#define FIRMWARE_KAVERI "radeon/kaveri_vce.bin"
++#define FIRMWARE_HAWAII "radeon/hawaii_vce.bin"
+ #define FIRMWARE_MULLINS "radeon/mullins_vce.bin"
+ #endif
+ #define FIRMWARE_TONGA "amdgpu/tonga_vce.bin"
+ #define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin"
+ #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
+ #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin"
++#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin"
++#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
+
+ #ifdef CONFIG_DRM_AMDGPU_CIK
+ MODULE_FIRMWARE(FIRMWARE_BONAIRE);
+@@ -62,6 +64,8 @@ MODULE_FIRMWARE(FIRMWARE_TONGA);
+ MODULE_FIRMWARE(FIRMWARE_CARRIZO);
+ MODULE_FIRMWARE(FIRMWARE_FIJI);
+ MODULE_FIRMWARE(FIRMWARE_STONEY);
++MODULE_FIRMWARE(FIRMWARE_POLARIS10);
++MODULE_FIRMWARE(FIRMWARE_POLARIS11);
+
+ static void amdgpu_vce_idle_work_handler(struct work_struct *work);
+
+@@ -113,6 +117,12 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+ case CHIP_STONEY:
+ fw_name = FIRMWARE_STONEY;
+ break;
++ case CHIP_POLARIS10:
++ fw_name = FIRMWARE_POLARIS10;
++ break;
++ case CHIP_POLARIS11:
++ fw_name = FIRMWARE_POLARIS11;
++ break;
+
+ default:
+ return -EINVAL;
+@@ -234,6 +244,7 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
+ if (i == AMDGPU_MAX_VCE_HANDLES)
+ return 0;
+
++ cancel_delayed_work_sync(&adev->vce.idle_work);
+ /* TODO: suspending running encoding sessions isn't supported */
+ return -EINVAL;
+ }
+@@ -425,7 +436,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
++ r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
+ job->fence = f;
+ if (r)
+ goto err;
+@@ -487,7 +498,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ ib->ptr[i] = 0x0;
+
+ if (direct) {
+- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
++ r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
+ job->fence = f;
+ if (r)
+ goto err;
+@@ -751,7 +762,8 @@ out:
+ * @ib: the IB to execute
+ *
+ */
+-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
++void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch)
+ {
+ amdgpu_ring_write(ring, VCE_CMD_IB);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+index ef99d23..f40cf76 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+@@ -34,7 +34,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct fence **fence);
+ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
+ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
++void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch);
+ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags);
+ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+index 75154ac..9f36ed3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -53,6 +53,18 @@
+ /* Special value that no flush is necessary */
+ #define AMDGPU_VM_NO_FLUSH (~0ll)
+
++/* Local structure. Encapsulate some VM table update parameters to reduce
++ * the number of function parameters
++ */
++struct amdgpu_vm_update_params {
++ /* address where to copy page table entries from */
++ uint64_t src;
++ /* DMA addresses to use for mapping */
++ dma_addr_t *pages_addr;
++ /* indirect buffer to fill with commands */
++ struct amdgpu_ib *ib;
++};
++
+ /**
+ * amdgpu_vm_num_pde - return the number of page directory entries
+ *
+@@ -166,88 +178,109 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ {
+ uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
+ struct amdgpu_device *adev = ring->adev;
+- struct amdgpu_vm_id *id = &vm->ids[ring->idx];
+ struct fence *updates = sync->last_vm_update;
++ struct amdgpu_vm_id *id;
++ unsigned i = ring->idx;
+ int r;
+
+ mutex_lock(&adev->vm_manager.lock);
+
+- /* check if the id is still valid */
+- if (id->mgr_id) {
+- struct fence *flushed = id->flushed_updates;
+- bool is_later;
+- long owner;
++ /* Check if we can use a VMID already assigned to this VM */
++ do {
++ struct fence *flushed;
+
+- if (!flushed)
+- is_later = true;
+- else if (!updates)
+- is_later = false;
+- else
+- is_later = fence_is_later(updates, flushed);
++ id = vm->ids[i++];
++ if (i == AMDGPU_MAX_RINGS)
++ i = 0;
+
+- owner = atomic_long_read(&id->mgr_id->owner);
+- if (!is_later && owner == (long)id &&
+- pd_addr == id->pd_gpu_addr) {
++ /* Check all the prerequisites to using this VMID */
++ if (!id)
++ continue;
+
+- r = amdgpu_sync_fence(ring->adev, sync,
+- id->mgr_id->active);
+- if (r) {
+- mutex_unlock(&adev->vm_manager.lock);
+- return r;
+- }
++ if (atomic64_read(&id->owner) != vm->client_id)
++ continue;
+
+- fence_put(id->mgr_id->active);
+- id->mgr_id->active = fence_get(fence);
++ if (pd_addr != id->pd_gpu_addr)
++ continue;
+
+- list_move_tail(&id->mgr_id->list,
+- &adev->vm_manager.ids_lru);
++ if (id->last_user != ring &&
++ (!id->last_flush || !fence_is_signaled(id->last_flush)))
++ continue;
+
+- *vm_id = id->mgr_id - adev->vm_manager.ids;
+- *vm_pd_addr = AMDGPU_VM_NO_FLUSH;
+- trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id,
+- *vm_pd_addr);
++ flushed = id->flushed_updates;
++ if (updates && (!flushed || fence_is_later(updates, flushed)))
++ continue;
+
+- mutex_unlock(&adev->vm_manager.lock);
+- return 0;
++ /* Good we can use this VMID */
++ if (id->last_user == ring) {
++ r = amdgpu_sync_fence(ring->adev, sync,
++ id->first);
++ if (r)
++ goto error;
+ }
+- }
+
+- id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
+- struct amdgpu_vm_manager_id,
+- list);
++ /* And remember this submission as user of the VMID */
++ r = amdgpu_sync_fence(ring->adev, &id->active, fence);
++ if (r)
++ goto error;
+
+- if (id->mgr_id->active && !fence_is_signaled(id->mgr_id->active)) {
+- struct amdgpu_vm_manager_id *mgr_id, *tmp;
++ list_move_tail(&id->list, &adev->vm_manager.ids_lru);
++ vm->ids[ring->idx] = id;
++
++ *vm_id = id - adev->vm_manager.ids;
++ *vm_pd_addr = AMDGPU_VM_NO_FLUSH;
++ trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
++
++ mutex_unlock(&adev->vm_manager.lock);
++ return 0;
++
++ } while (i != ring->idx);
++
++ id = list_first_entry(&adev->vm_manager.ids_lru,
++ struct amdgpu_vm_id,
++ list);
++
++ if (!amdgpu_sync_is_idle(&id->active)) {
+ struct list_head *head = &adev->vm_manager.ids_lru;
+- list_for_each_entry_safe(mgr_id, tmp, &adev->vm_manager.ids_lru, list) {
+- if (mgr_id->active && fence_is_signaled(mgr_id->active)) {
+- list_move(&mgr_id->list, head);
+- head = &mgr_id->list;
++ struct amdgpu_vm_id *tmp;
++
++ list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
++ list) {
++ if (amdgpu_sync_is_idle(&id->active)) {
++ list_move(&id->list, head);
++ head = &id->list;
+ }
+ }
+- id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
+- struct amdgpu_vm_manager_id,
+- list);
++ id = list_first_entry(&adev->vm_manager.ids_lru,
++ struct amdgpu_vm_id,
++ list);
+ }
+
+- r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
+- if (!r) {
+- fence_put(id->mgr_id->active);
+- id->mgr_id->active = fence_get(fence);
++ r = amdgpu_sync_cycle_fences(sync, &id->active, fence);
++ if (r)
++ goto error;
++
++ fence_put(id->first);
++ id->first = fence_get(fence);
+
+- fence_put(id->flushed_updates);
+- id->flushed_updates = fence_get(updates);
++ fence_put(id->last_flush);
++ id->last_flush = NULL;
+
+- id->pd_gpu_addr = pd_addr;
++ fence_put(id->flushed_updates);
++ id->flushed_updates = fence_get(updates);
+
+- list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
+- atomic_long_set(&id->mgr_id->owner, (long)id);
++ id->pd_gpu_addr = pd_addr;
+
+- *vm_id = id->mgr_id - adev->vm_manager.ids;
+- *vm_pd_addr = pd_addr;
+- trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+- }
++ list_move_tail(&id->list, &adev->vm_manager.ids_lru);
++ id->last_user = ring;
++ atomic64_set(&id->owner, vm->client_id);
++ vm->ids[ring->idx] = id;
+
++ *vm_id = id - adev->vm_manager.ids;
++ *vm_pd_addr = pd_addr;
++ trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
++
++error:
+ mutex_unlock(&adev->vm_manager.lock);
+ return r;
+ }
+@@ -261,43 +294,62 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ *
+ * Emit a VM flush when it is necessary.
+ */
+-void amdgpu_vm_flush(struct amdgpu_ring *ring,
+- unsigned vm_id, uint64_t pd_addr,
+- uint32_t gds_base, uint32_t gds_size,
+- uint32_t gws_base, uint32_t gws_size,
+- uint32_t oa_base, uint32_t oa_size)
++int amdgpu_vm_flush(struct amdgpu_ring *ring,
++ unsigned vm_id, uint64_t pd_addr,
++ uint32_t gds_base, uint32_t gds_size,
++ uint32_t gws_base, uint32_t gws_size,
++ uint32_t oa_base, uint32_t oa_size)
+ {
+ struct amdgpu_device *adev = ring->adev;
+- struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
++ struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+ bool gds_switch_needed = ring->funcs->emit_gds_switch && (
+- mgr_id->gds_base != gds_base ||
+- mgr_id->gds_size != gds_size ||
+- mgr_id->gws_base != gws_base ||
+- mgr_id->gws_size != gws_size ||
+- mgr_id->oa_base != oa_base ||
+- mgr_id->oa_size != oa_size);
++ id->gds_base != gds_base ||
++ id->gds_size != gds_size ||
++ id->gws_base != gws_base ||
++ id->gws_size != gws_size ||
++ id->oa_base != oa_base ||
++ id->oa_size != oa_size);
++ int r;
+
+ if (ring->funcs->emit_pipeline_sync && (
+- pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
++ pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
++ ring->type == AMDGPU_RING_TYPE_COMPUTE))
+ amdgpu_ring_emit_pipeline_sync(ring);
+
+- if (pd_addr != AMDGPU_VM_NO_FLUSH) {
++ if (ring->funcs->emit_vm_flush &&
++ pd_addr != AMDGPU_VM_NO_FLUSH) {
++ struct fence *fence;
++
+ trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
+ amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
++
++ mutex_lock(&adev->vm_manager.lock);
++ if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
++ r = amdgpu_fence_emit(ring, &fence);
++ if (r) {
++ mutex_unlock(&adev->vm_manager.lock);
++ return r;
++ }
++ fence_put(id->last_flush);
++ id->last_flush = fence;
++ }
++ mutex_unlock(&adev->vm_manager.lock);
+ }
+
+ if (gds_switch_needed) {
+- mgr_id->gds_base = gds_base;
+- mgr_id->gds_size = gds_size;
+- mgr_id->gws_base = gws_base;
+- mgr_id->gws_size = gws_size;
+- mgr_id->oa_base = oa_base;
+- mgr_id->oa_size = oa_size;
++ id->gds_base = gds_base;
++ id->gds_size = gds_size;
++ id->gws_base = gws_base;
++ id->gws_size = gws_size;
++ id->oa_base = oa_base;
++ id->oa_size = oa_size;
+ amdgpu_ring_emit_gds_switch(ring, vm_id,
+ gds_base, gds_size,
+ gws_base, gws_size,
+ oa_base, oa_size);
+ }
++
++ return 0;
+ }
+
+ /**
+@@ -310,14 +362,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
+ */
+ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
+ {
+- struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+-
+- mgr_id->gds_base = 0;
+- mgr_id->gds_size = 0;
+- mgr_id->gws_base = 0;
+- mgr_id->gws_size = 0;
+- mgr_id->oa_base = 0;
+- mgr_id->oa_size = 0;
++ struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
++
++ id->gds_base = 0;
++ id->gds_size = 0;
++ id->gws_base = 0;
++ id->gws_size = 0;
++ id->oa_base = 0;
++ id->oa_size = 0;
+ }
+
+ /**
+@@ -349,9 +401,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+ * amdgpu_vm_update_pages - helper to call the right asic function
+ *
+ * @adev: amdgpu_device pointer
+- * @gtt: GART instance to use for mapping
+- * @gtt_flags: GTT hw access flags
+- * @ib: indirect buffer to fill with commands
++ * @vm_update_params: see amdgpu_vm_update_params definition
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+@@ -362,30 +412,29 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+ * to setup the page table using the DMA.
+ */
+ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
+- struct amdgpu_gart *gtt,
+- uint32_t gtt_flags,
+- struct amdgpu_ib *ib,
++ struct amdgpu_vm_update_params
++ *vm_update_params,
+ uint64_t pe, uint64_t addr,
+ unsigned count, uint32_t incr,
+ uint32_t flags)
+ {
+ trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
+
+- if ((gtt == &adev->gart) && (flags == gtt_flags)) {
+- uint64_t src = gtt->table_addr + (addr >> 12) * 8;
+- amdgpu_vm_copy_pte(adev, ib, pe, src, count);
++ if (vm_update_params->src) {
++ amdgpu_vm_copy_pte(adev, vm_update_params->ib,
++ pe, (vm_update_params->src + (addr >> 12) * 8), count);
+
+- } else if (gtt) {
+- dma_addr_t *pages_addr = gtt->pages_addr;
+- amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr,
+- count, incr, flags);
++ } else if (vm_update_params->pages_addr) {
++ amdgpu_vm_write_pte(adev, vm_update_params->ib,
++ vm_update_params->pages_addr,
++ pe, addr, count, incr, flags);
+
+ } else if (count < 3) {
+- amdgpu_vm_write_pte(adev, ib, NULL, pe, addr,
++ amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr,
+ count, incr, flags);
+
+ } else {
+- amdgpu_vm_set_pte_pde(adev, ib, pe, addr,
++ amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr,
+ count, incr, flags);
+ }
+ }
+@@ -405,10 +454,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring;
+ struct fence *fence = NULL;
+ struct amdgpu_job *job;
++ struct amdgpu_vm_update_params vm_update_params;
+ unsigned entries;
+ uint64_t addr;
+ int r;
+
++ memset(&vm_update_params, 0, sizeof(vm_update_params));
+ ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
+ r = reservation_object_reserve_shared(bo->tbo.resv);
+@@ -426,7 +477,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
+ if (r)
+ goto error;
+
+- amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries,
++ vm_update_params.ib = &job->ibs[0];
++ amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries,
+ 0, 0);
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+
+@@ -499,11 +551,12 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ uint64_t last_pde = ~0, last_pt = ~0;
+ unsigned count = 0, pt_idx, ndw;
+ struct amdgpu_job *job;
+- struct amdgpu_ib *ib;
++ struct amdgpu_vm_update_params vm_update_params;
+ struct fence *fence = NULL;
+
+ int r;
+
++ memset(&vm_update_params, 0, sizeof(vm_update_params));
+ ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
+ /* padding, etc. */
+@@ -516,7 +569,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ if (r)
+ return r;
+
+- ib = &job->ibs[0];
++ vm_update_params.ib = &job->ibs[0];
+
+ /* walk over the address space and update the page directory */
+ for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
+@@ -536,7 +589,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ ((last_pt + incr * count) != pt)) {
+
+ if (count) {
+- amdgpu_vm_update_pages(adev, NULL, 0, ib,
++ amdgpu_vm_update_pages(adev, &vm_update_params,
+ last_pde, last_pt,
+ count, incr,
+ AMDGPU_PTE_VALID);
+@@ -551,14 +604,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ }
+
+ if (count)
+- amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt,
+- count, incr, AMDGPU_PTE_VALID);
++ amdgpu_vm_update_pages(adev, &vm_update_params,
++ last_pde, last_pt,
++ count, incr, AMDGPU_PTE_VALID);
+
+- if (ib->length_dw != 0) {
+- amdgpu_ring_pad_ib(ring, ib);
++ if (vm_update_params.ib->length_dw != 0) {
++ amdgpu_ring_pad_ib(ring, vm_update_params.ib);
+ amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
+ AMDGPU_FENCE_OWNER_VM);
+- WARN_ON(ib->length_dw > ndw);
++ WARN_ON(vm_update_params.ib->length_dw > ndw);
+ r = amdgpu_job_submit(job, ring, &vm->entity,
+ AMDGPU_FENCE_OWNER_VM, &fence);
+ if (r)
+@@ -584,18 +638,15 @@ error_free:
+ * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ *
+ * @adev: amdgpu_device pointer
+- * @gtt: GART instance to use for mapping
+- * @gtt_flags: GTT hw mapping flags
+- * @ib: IB for the update
++ * @vm_update_params: see amdgpu_vm_update_params definition
+ * @pe_start: first PTE to handle
+ * @pe_end: last PTE to handle
+ * @addr: addr those PTEs should point to
+ * @flags: hw mapping flags
+ */
+ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
+- struct amdgpu_gart *gtt,
+- uint32_t gtt_flags,
+- struct amdgpu_ib *ib,
++ struct amdgpu_vm_update_params
++ *vm_update_params,
+ uint64_t pe_start, uint64_t pe_end,
+ uint64_t addr, uint32_t flags)
+ {
+@@ -632,10 +683,11 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
+ return;
+
+ /* system pages are non continuously */
+- if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
++ if (vm_update_params->src || vm_update_params->pages_addr ||
++ !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
+
+ count = (pe_end - pe_start) / 8;
+- amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start,
++ amdgpu_vm_update_pages(adev, vm_update_params, pe_start,
+ addr, count, AMDGPU_GPU_PAGE_SIZE,
+ flags);
+ return;
+@@ -644,21 +696,21 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
+ /* handle the 4K area at the beginning */
+ if (pe_start != frag_start) {
+ count = (frag_start - pe_start) / 8;
+- amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr,
++ amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr,
+ count, AMDGPU_GPU_PAGE_SIZE, flags);
+ addr += AMDGPU_GPU_PAGE_SIZE * count;
+ }
+
+ /* handle the area in the middle */
+ count = (frag_end - frag_start) / 8;
+- amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count,
++ amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count,
+ AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
+
+ /* handle the 4K area at the end */
+ if (frag_end != pe_end) {
+ addr += AMDGPU_GPU_PAGE_SIZE * count;
+ count = (pe_end - frag_end) / 8;
+- amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr,
++ amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr,
+ count, AMDGPU_GPU_PAGE_SIZE, flags);
+ }
+ }
+@@ -667,8 +719,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
+ * amdgpu_vm_update_ptes - make sure that page tables are valid
+ *
+ * @adev: amdgpu_device pointer
+- * @gtt: GART instance to use for mapping
+- * @gtt_flags: GTT hw mapping flags
++ * @vm_update_params: see amdgpu_vm_update_params definition
+ * @vm: requested vm
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+@@ -678,10 +729,9 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
+ * Update the page tables in the range @start - @end.
+ */
+ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+- struct amdgpu_gart *gtt,
+- uint32_t gtt_flags,
++ struct amdgpu_vm_update_params
++ *vm_update_params,
+ struct amdgpu_vm *vm,
+- struct amdgpu_ib *ib,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint32_t flags)
+ {
+@@ -707,7 +757,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+
+ if (last_pe_end != pe_start) {
+
+- amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
++ amdgpu_vm_frag_ptes(adev, vm_update_params,
+ last_pe_start, last_pe_end,
+ last_dst, flags);
+
+@@ -722,17 +772,16 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+ dst += nptes * AMDGPU_GPU_PAGE_SIZE;
+ }
+
+- amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+- last_pe_start, last_pe_end,
+- last_dst, flags);
++ amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start,
++ last_pe_end, last_dst, flags);
+ }
+
+ /**
+ * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
+ *
+ * @adev: amdgpu_device pointer
+- * @gtt: GART instance to use for mapping
+- * @gtt_flags: flags as they are used for GTT
++ * @src: address where to copy page table entries from
++ * @pages_addr: DMA addresses to use for mapping
+ * @vm: requested vm
+ * @start: start of mapped range
+ * @last: last mapped entry
+@@ -744,8 +793,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+ * Returns 0 for success, -EINVAL for failure.
+ */
+ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+- struct amdgpu_gart *gtt,
+- uint32_t gtt_flags,
++ uint64_t src,
++ dma_addr_t *pages_addr,
+ struct amdgpu_vm *vm,
+ uint64_t start, uint64_t last,
+ uint32_t flags, uint64_t addr,
+@@ -755,11 +804,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ void *owner = AMDGPU_FENCE_OWNER_VM;
+ unsigned nptes, ncmds, ndw;
+ struct amdgpu_job *job;
+- struct amdgpu_ib *ib;
++ struct amdgpu_vm_update_params vm_update_params;
+ struct fence *f = NULL;
+ int r;
+
+ ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
++ memset(&vm_update_params, 0, sizeof(vm_update_params));
++ vm_update_params.src = src;
++ vm_update_params.pages_addr = pages_addr;
+
+ /* sync to everything on unmapping */
+ if (!(flags & AMDGPU_PTE_VALID))
+@@ -776,11 +828,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ /* padding, etc. */
+ ndw = 64;
+
+- if ((gtt == &adev->gart) && (flags == gtt_flags)) {
++ if (vm_update_params.src) {
+ /* only copy commands needed */
+ ndw += ncmds * 7;
+
+- } else if (gtt) {
++ } else if (vm_update_params.pages_addr) {
+ /* header for write data commands */
+ ndw += ncmds * 4;
+
+@@ -799,7 +851,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ if (r)
+ return r;
+
+- ib = &job->ibs[0];
++ vm_update_params.ib = &job->ibs[0];
+
+ r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
+ owner);
+@@ -810,11 +862,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ if (r)
+ goto error_free;
+
+- amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1,
+- addr, flags);
++ amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start,
++ last + 1, addr, flags);
+
+- amdgpu_ring_pad_ib(ring, ib);
+- WARN_ON(ib->length_dw > ndw);
++ amdgpu_ring_pad_ib(ring, vm_update_params.ib);
++ WARN_ON(vm_update_params.ib->length_dw > ndw);
+ r = amdgpu_job_submit(job, ring, &vm->entity,
+ AMDGPU_FENCE_OWNER_VM, &f);
+ if (r)
+@@ -837,11 +889,12 @@ error_free:
+ * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
+ *
+ * @adev: amdgpu_device pointer
+- * @gtt: GART instance to use for mapping
++ * @gtt_flags: flags as they are used for GTT
++ * @pages_addr: DMA addresses to use for mapping
+ * @vm: requested vm
+ * @mapping: mapped range and flags to use for the update
+ * @addr: addr to set the area to
+- * @gtt_flags: flags as they are used for GTT
++ * @flags: HW flags for the mapping
+ * @fence: optional resulting fence
+ *
+ * Split the mapping into smaller chunks so that each update fits
+@@ -849,16 +902,16 @@ error_free:
+ * Returns 0 for success, -EINVAL for failure.
+ */
+ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+- struct amdgpu_gart *gtt,
+ uint32_t gtt_flags,
++ dma_addr_t *pages_addr,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va_mapping *mapping,
+- uint64_t addr, struct fence **fence)
++ uint32_t flags, uint64_t addr,
++ struct fence **fence)
+ {
+ const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
+
+- uint64_t start = mapping->it.start;
+- uint32_t flags = gtt_flags;
++ uint64_t src = 0, start = mapping->it.start;
+ int r;
+
+ /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
+@@ -871,10 +924,15 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+
+ trace_amdgpu_vm_bo_update(mapping);
+
++ if (pages_addr) {
++ if (flags == gtt_flags)
++ src = adev->gart.table_addr + (addr >> 12) * 8;
++ addr = 0;
++ }
+ addr += mapping->offset;
+
+- if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags)))
+- return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
++ if (!pages_addr || src)
++ return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+ start, mapping->it.last,
+ flags, addr, fence);
+
+@@ -882,7 +940,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+ uint64_t last;
+
+ last = min((uint64_t)mapping->it.last, start + max_size - 1);
+- r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
++ r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+ start, last, flags, addr,
+ fence);
+ if (r)
+@@ -913,16 +971,20 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+ {
+ struct amdgpu_vm *vm = bo_va->vm;
+ struct amdgpu_bo_va_mapping *mapping;
+- struct amdgpu_gart *gtt = NULL;
+- uint32_t flags;
++ dma_addr_t *pages_addr = NULL;
++ uint32_t gtt_flags, flags;
+ uint64_t addr;
+ int r;
+
+ if (mem) {
++ struct ttm_dma_tt *ttm;
++
+ addr = (u64)mem->start << PAGE_SHIFT;
+ switch (mem->mem_type) {
+ case TTM_PL_TT:
+- gtt = &bo_va->bo->adev->gart;
++ ttm = container_of(bo_va->bo->tbo.ttm, struct
++ ttm_dma_tt, ttm);
++ pages_addr = ttm->dma_address;
+ break;
+
+ case TTM_PL_VRAM:
+@@ -937,6 +999,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+ }
+
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
++ gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
+
+ spin_lock(&vm->status_lock);
+ if (!list_empty(&bo_va->vm_status))
+@@ -944,7 +1007,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+ spin_unlock(&vm->status_lock);
+
+ list_for_each_entry(mapping, &bo_va->invalids, list) {
+- r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr,
++ r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm,
++ mapping, flags, addr,
+ &bo_va->last_pt_update);
+ if (r)
+ return r;
+@@ -985,22 +1049,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping;
+ int r;
+
+- spin_lock(&vm->freed_lock);
+ while (!list_empty(&vm->freed)) {
+ mapping = list_first_entry(&vm->freed,
+ struct amdgpu_bo_va_mapping, list);
+ list_del(&mapping->list);
+- spin_unlock(&vm->freed_lock);
+- r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
+- 0, NULL);
++
++ r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping,
++ 0, 0, NULL);
+ kfree(mapping);
+ if (r)
+ return r;
+
+- spin_lock(&vm->freed_lock);
+ }
+- spin_unlock(&vm->freed_lock);
+-
+ return 0;
+
+ }
+@@ -1027,9 +1087,8 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
+ bo_va = list_first_entry(&vm->invalidated,
+ struct amdgpu_bo_va, vm_status);
+ spin_unlock(&vm->status_lock);
+- mutex_lock(&bo_va->mutex);
++
+ r = amdgpu_vm_bo_update(adev, bo_va, NULL);
+- mutex_unlock(&bo_va->mutex);
+ if (r)
+ return r;
+
+@@ -1073,7 +1132,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+ INIT_LIST_HEAD(&bo_va->valids);
+ INIT_LIST_HEAD(&bo_va->invalids);
+ INIT_LIST_HEAD(&bo_va->vm_status);
+- mutex_init(&bo_va->mutex);
++
+ list_add_tail(&bo_va->bo_list, &bo->va);
+
+ return bo_va;
+@@ -1125,9 +1184,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr /= AMDGPU_GPU_PAGE_SIZE;
+
+- spin_lock(&vm->it_lock);
+ it = interval_tree_iter_first(&vm->va, saddr, eaddr);
+- spin_unlock(&vm->it_lock);
+ if (it) {
+ struct amdgpu_bo_va_mapping *tmp;
+ tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
+@@ -1151,13 +1208,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+ mapping->offset = offset;
+ mapping->flags = flags;
+
+- mutex_lock(&bo_va->mutex);
+ list_add(&mapping->list, &bo_va->invalids);
+- mutex_unlock(&bo_va->mutex);
+- spin_lock(&vm->it_lock);
+ interval_tree_insert(&mapping->it, &vm->va);
+- spin_unlock(&vm->it_lock);
+- trace_amdgpu_vm_bo_map(bo_va, mapping);
+
+ /* Make sure the page tables are allocated */
+ saddr >>= amdgpu_vm_block_size;
+@@ -1209,9 +1261,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+
+ error_free:
+ list_del(&mapping->list);
+- spin_lock(&vm->it_lock);
+ interval_tree_remove(&mapping->it, &vm->va);
+- spin_unlock(&vm->it_lock);
+ trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+ kfree(mapping);
+
+@@ -1240,7 +1290,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+ bool valid = true;
+
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+- mutex_lock(&bo_va->mutex);
++
+ list_for_each_entry(mapping, &bo_va->valids, list) {
+ if (mapping->it.start == saddr)
+ break;
+@@ -1254,25 +1304,18 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+ break;
+ }
+
+- if (&mapping->list == &bo_va->invalids) {
+- mutex_unlock(&bo_va->mutex);
++ if (&mapping->list == &bo_va->invalids)
+ return -ENOENT;
+- }
+ }
+- mutex_unlock(&bo_va->mutex);
++
+ list_del(&mapping->list);
+- spin_lock(&vm->it_lock);
+ interval_tree_remove(&mapping->it, &vm->va);
+- spin_unlock(&vm->it_lock);
+ trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+
+- if (valid) {
+- spin_lock(&vm->freed_lock);
++ if (valid)
+ list_add(&mapping->list, &vm->freed);
+- spin_unlock(&vm->freed_lock);
+- } else {
++ else
+ kfree(mapping);
+- }
+
+ return 0;
+ }
+@@ -1301,23 +1344,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+
+ list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
+ list_del(&mapping->list);
+- spin_lock(&vm->it_lock);
+ interval_tree_remove(&mapping->it, &vm->va);
+- spin_unlock(&vm->it_lock);
+ trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+- spin_lock(&vm->freed_lock);
+ list_add(&mapping->list, &vm->freed);
+- spin_unlock(&vm->freed_lock);
+ }
+ list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
+ list_del(&mapping->list);
+- spin_lock(&vm->it_lock);
+ interval_tree_remove(&mapping->it, &vm->va);
+- spin_unlock(&vm->it_lock);
+ kfree(mapping);
+ }
++
+ fence_put(bo_va->last_pt_update);
+- mutex_destroy(&bo_va->mutex);
+ kfree(bo_va);
+ }
+
+@@ -1361,17 +1398,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+ struct amd_sched_rq *rq;
+ int i, r;
+
+- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+- vm->ids[i].mgr_id = NULL;
+- vm->ids[i].flushed_updates = NULL;
+- }
++ for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
++ vm->ids[i] = NULL;
+ vm->va = RB_ROOT;
++ vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
+ spin_lock_init(&vm->status_lock);
+ INIT_LIST_HEAD(&vm->invalidated);
+ INIT_LIST_HEAD(&vm->cleared);
+ INIT_LIST_HEAD(&vm->freed);
+- spin_lock_init(&vm->it_lock);
+- spin_lock_init(&vm->freed_lock);
++
+ pd_size = amdgpu_vm_directory_size(adev);
+ pd_entries = amdgpu_vm_num_pdes(adev);
+
+@@ -1458,14 +1493,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+
+ amdgpu_bo_unref(&vm->page_directory);
+ fence_put(vm->page_directory_fence);
+- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+- struct amdgpu_vm_id *id = &vm->ids[i];
+-
+- if (id->mgr_id)
+- atomic_long_cmpxchg(&id->mgr_id->owner,
+- (long)id, 0);
+- fence_put(id->flushed_updates);
+- }
+ }
+
+ /**
+@@ -1484,11 +1511,13 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
+ /* skip over VMID 0, since it is the system VM */
+ for (i = 1; i < adev->vm_manager.num_ids; ++i) {
+ amdgpu_vm_reset_id(adev, i);
++ amdgpu_sync_create(&adev->vm_manager.ids[i].active);
+ list_add_tail(&adev->vm_manager.ids[i].list,
+ &adev->vm_manager.ids_lru);
+ }
+
+ atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
++ atomic64_set(&adev->vm_manager.client_counter, 0);
+ }
+
+ /**
+@@ -1502,6 +1531,11 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
+ {
+ unsigned i;
+
+- for (i = 0; i < AMDGPU_NUM_VM; ++i)
+- fence_put(adev->vm_manager.ids[i].active);
++ for (i = 0; i < AMDGPU_NUM_VM; ++i) {
++ struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
++
++ fence_put(adev->vm_manager.ids[i].first);
++ amdgpu_sync_free(&adev->vm_manager.ids[i].active);
++ fence_put(id->flushed_updates);
++ }
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
+index fece8f4..49daf6d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/atom.h
++++ b/drivers/gpu/drm/amd/amdgpu/atom.h
+@@ -92,7 +92,7 @@
+ #define ATOM_WS_AND_MASK 0x45
+ #define ATOM_WS_FB_WINDOW 0x46
+ #define ATOM_WS_ATTRIBUTES 0x47
+-#define ATOM_WS_REGPTR 0x48
++#define ATOM_WS_REGPTR 0x48
+
+ #define ATOM_IIO_NOP 0
+ #define ATOM_IIO_START 1
+diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+index 49aa350..49a39b1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
++++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+@@ -461,13 +461,14 @@ union set_pixel_clock {
+ PIXEL_CLOCK_PARAMETERS_V3 v3;
+ PIXEL_CLOCK_PARAMETERS_V5 v5;
+ PIXEL_CLOCK_PARAMETERS_V6 v6;
++ PIXEL_CLOCK_PARAMETERS_V7 v7;
+ };
+
+ /* on DCE5, make sure the voltage is high enough to support the
+ * required disp clk.
+ */
+ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
+- u32 dispclk)
++ u32 dispclk)
+ {
+ u8 frev, crev;
+ int index;
+@@ -510,6 +511,49 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ }
+
++union set_dce_clock {
++ SET_DCE_CLOCK_PS_ALLOCATION_V1_1 v1_1;
++ SET_DCE_CLOCK_PS_ALLOCATION_V2_1 v2_1;
++};
++
++u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
++ u32 freq, u8 clk_type, u8 clk_src)
++{
++ u8 frev, crev;
++ int index;
++ union set_dce_clock args;
++ u32 ret_freq = 0;
++
++ memset(&args, 0, sizeof(args));
++
++ index = GetIndexIntoMasterTable(COMMAND, SetDCEClock);
++ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
++ &crev))
++ return 0;
++
++ switch (frev) {
++ case 2:
++ switch (crev) {
++ case 1:
++ args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */
++ args.v2_1.asParam.ucDCEClkType = clk_type;
++ args.v2_1.asParam.ucDCEClkSrc = clk_src;
++ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
++ ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10;
++ break;
++ default:
++ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
++ return 0;
++ }
++ break;
++ default:
++ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
++ return 0;
++ }
++
++ return ret_freq;
++}
++
+ static bool is_pixel_clock_source_from_pll(u32 encoder_mode, int pll_id)
+ {
+ if (ENCODER_MODE_IS_DP(encoder_mode)) {
+@@ -523,18 +567,18 @@ static bool is_pixel_clock_source_from_pll(u32 encoder_mode, int pll_id)
+ }
+
+ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
+- u32 crtc_id,
+- int pll_id,
+- u32 encoder_mode,
+- u32 encoder_id,
+- u32 clock,
+- u32 ref_div,
+- u32 fb_div,
+- u32 frac_fb_div,
+- u32 post_div,
+- int bpc,
+- bool ss_enabled,
+- struct amdgpu_atom_ss *ss)
++ u32 crtc_id,
++ int pll_id,
++ u32 encoder_mode,
++ u32 encoder_id,
++ u32 clock,
++ u32 ref_div,
++ u32 fb_div,
++ u32 frac_fb_div,
++ u32 post_div,
++ int bpc,
++ bool ss_enabled,
++ struct amdgpu_atom_ss *ss)
+ {
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = dev->dev_private;
+@@ -652,6 +696,34 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
+ args.v6.ucEncoderMode = encoder_mode;
+ args.v6.ucPpll = pll_id;
+ break;
++ case 7:
++ args.v7.ulPixelClock = cpu_to_le32(clock * 10); /* 100 hz units */
++ args.v7.ucMiscInfo = 0;
++ if ((encoder_mode == ATOM_ENCODER_MODE_DVI) &&
++ (clock > 165000))
++ args.v7.ucMiscInfo |= PIXEL_CLOCK_V7_MISC_DVI_DUALLINK_EN;
++ args.v7.ucCRTC = crtc_id;
++ if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
++ switch (bpc) {
++ case 8:
++ default:
++ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_DIS;
++ break;
++ case 10:
++ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_5_4;
++ break;
++ case 12:
++ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_3_2;
++ break;
++ case 16:
++ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_2_1;
++ break;
++ }
++ }
++ args.v7.ucTransmitterID = encoder_id;
++ args.v7.ucEncoderMode = encoder_mode;
++ args.v7.ucPpll = pll_id;
++ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return;
+diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h
+index c670833..0eeda8e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h
++++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h
+@@ -37,6 +37,8 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
+ struct drm_display_mode *mode);
+ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
+ u32 dispclk);
++u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
++ u32 freq, u8 clk_type, u8 clk_src);
+ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
+ u32 crtc_id,
+ int pll_id,
+diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+index 1e0bba2..48b6bd6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
++++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+@@ -298,6 +298,10 @@ bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder,
+ && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2)))
+ adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2;
+
++ /* vertical FP must be at least 1 */
++ if (mode->crtc_vsync_start == mode->crtc_vdisplay)
++ adjusted_mode->crtc_vsync_start++;
++
+ /* get the native mode for scaling */
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_panel_mode_fixup(encoder, adjusted_mode);
+@@ -563,6 +567,7 @@ union dig_encoder_control {
+ DIG_ENCODER_CONTROL_PARAMETERS_V2 v2;
+ DIG_ENCODER_CONTROL_PARAMETERS_V3 v3;
+ DIG_ENCODER_CONTROL_PARAMETERS_V4 v4;
++ DIG_ENCODER_CONTROL_PARAMETERS_V5 v5;
+ };
+
+ void
+@@ -690,6 +695,47 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
+ else
+ args.v4.ucHPD_ID = hpd_id + 1;
+ break;
++ case 5:
++ switch (action) {
++ case ATOM_ENCODER_CMD_SETUP_PANEL_MODE:
++ args.v5.asDPPanelModeParam.ucAction = action;
++ args.v5.asDPPanelModeParam.ucPanelMode = panel_mode;
++ args.v5.asDPPanelModeParam.ucDigId = dig->dig_encoder;
++ break;
++ case ATOM_ENCODER_CMD_STREAM_SETUP:
++ args.v5.asStreamParam.ucAction = action;
++ args.v5.asStreamParam.ucDigId = dig->dig_encoder;
++ args.v5.asStreamParam.ucDigMode =
++ amdgpu_atombios_encoder_get_encoder_mode(encoder);
++ if (ENCODER_MODE_IS_DP(args.v5.asStreamParam.ucDigMode))
++ args.v5.asStreamParam.ucLaneNum = dp_lane_count;
++ else if (amdgpu_dig_monitor_is_duallink(encoder,
++ amdgpu_encoder->pixel_clock))
++ args.v5.asStreamParam.ucLaneNum = 8;
++ else
++ args.v5.asStreamParam.ucLaneNum = 4;
++ args.v5.asStreamParam.ulPixelClock =
++ cpu_to_le32(amdgpu_encoder->pixel_clock / 10);
++ args.v5.asStreamParam.ucBitPerColor =
++ amdgpu_atombios_encoder_get_bpc(encoder);
++ args.v5.asStreamParam.ucLinkRateIn270Mhz = dp_clock / 27000;
++ break;
++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_START:
++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1:
++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2:
++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN3:
++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN4:
++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE:
++ case ATOM_ENCODER_CMD_DP_VIDEO_OFF:
++ case ATOM_ENCODER_CMD_DP_VIDEO_ON:
++ args.v5.asCmdParam.ucAction = action;
++ args.v5.asCmdParam.ucDigId = dig->dig_encoder;
++ break;
++ default:
++ DRM_ERROR("Unsupported action 0x%x\n", action);
++ break;
++ }
++ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ break;
+@@ -710,11 +756,12 @@ union dig_transmitter_control {
+ DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3;
+ DIG_TRANSMITTER_CONTROL_PARAMETERS_V4 v4;
+ DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 v5;
++ DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6 v6;
+ };
+
+ void
+ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int action,
+- uint8_t lane_num, uint8_t lane_set)
++ uint8_t lane_num, uint8_t lane_set)
+ {
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
+@@ -1066,6 +1113,54 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
+ args.v5.ucDigEncoderSel = 1 << dig_encoder;
+ args.v5.ucDPLaneSet = lane_set;
+ break;
++ case 6:
++ args.v6.ucAction = action;
++ if (is_dp)
++ args.v6.ulSymClock = cpu_to_le32(dp_clock / 10);
++ else
++ args.v6.ulSymClock = cpu_to_le32(amdgpu_encoder->pixel_clock / 10);
++
++ switch (amdgpu_encoder->encoder_id) {
++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
++ if (dig->linkb)
++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYB;
++ else
++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYA;
++ break;
++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
++ if (dig->linkb)
++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYD;
++ else
++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYC;
++ break;
++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
++ if (dig->linkb)
++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYF;
++ else
++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYE;
++ break;
++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYG;
++ break;
++ }
++ if (is_dp)
++ args.v6.ucLaneNum = dp_lane_count;
++ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
++ args.v6.ucLaneNum = 8;
++ else
++ args.v6.ucLaneNum = 4;
++ args.v6.ucConnObjId = connector_object_id;
++ if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH)
++ args.v6.ucDPLaneSet = lane_set;
++ else
++ args.v6.ucDigMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
++
++ if (hpd_id == AMDGPU_HPD_NONE)
++ args.v6.ucHPDSel = 0;
++ else
++ args.v6.ucHPDSel = hpd_id + 1;
++ args.v6.ucDigEncoderSel = 1 << dig_encoder;
++ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ break;
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index f0c7b35..494104e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -27,6 +27,7 @@
+ #include "vi.h"
+ #include "vid.h"
+ #include "amdgpu_ucode.h"
++#include "amdgpu_atombios.h"
+ #include "clearstate_vi.h"
+
+ #include "gmc/gmc_8_2_d.h"
+@@ -51,6 +52,7 @@
+
+ #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
+ #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
++#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
+ #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
+
+ #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
+@@ -84,6 +86,8 @@ enum {
+ BPM_REG_FGCG_MAX
+ };
+
++#define RLC_FormatDirectRegListLength 14
++
+ MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
+ MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
+ MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
+@@ -117,6 +121,20 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
+ MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
+ MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
+
++MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
++
++MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
++
+ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
+ {
+ {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
+@@ -247,6 +265,66 @@ static const u32 tonga_mgcg_cgcg_init[] =
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+ };
+
++static const u32 golden_settings_polaris11_a11[] =
++{
++ mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
++ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
++ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
++ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
++ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
++ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
++ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
++ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
++ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
++ mmSQ_CONFIG, 0x07f80000, 0x07180000,
++ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
++ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
++ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
++ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
++ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
++};
++
++static const u32 polaris11_golden_common_all[] =
++{
++ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
++ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
++ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
++ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
++ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
++ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
++};
++
++static const u32 golden_settings_polaris10_a11[] =
++{
++ mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
++ mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
++ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
++ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
++ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
++ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
++ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
++ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
++ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
++ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
++ mmSQ_CONFIG, 0x07f80000, 0x07180000,
++ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
++ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
++ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
++ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
++};
++
++static const u32 polaris10_golden_common_all[] =
++{
++ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
++ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
++ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
++ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
++ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
++ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
++ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
++ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
++};
++
+ static const u32 fiji_golden_common_all[] =
+ {
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+@@ -527,7 +605,7 @@ static const u32 stoney_golden_settings_a11[] =
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+- mmTCC_CTRL, 0x00100000, 0xf31fff7f,
++ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
+ mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
+@@ -558,6 +636,9 @@ static const u32 stoney_mgcg_cgcg_init[] =
+ static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
+ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
++static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
++static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
++static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
+
+ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
+ {
+@@ -596,6 +677,22 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
+ tonga_golden_common_all,
+ (const u32)ARRAY_SIZE(tonga_golden_common_all));
+ break;
++ case CHIP_POLARIS11:
++ amdgpu_program_register_sequence(adev,
++ golden_settings_polaris11_a11,
++ (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
++ amdgpu_program_register_sequence(adev,
++ polaris11_golden_common_all,
++ (const u32)ARRAY_SIZE(polaris11_golden_common_all));
++ break;
++ case CHIP_POLARIS10:
++ amdgpu_program_register_sequence(adev,
++ golden_settings_polaris10_a11,
++ (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
++ amdgpu_program_register_sequence(adev,
++ polaris10_golden_common_all,
++ (const u32)ARRAY_SIZE(polaris10_golden_common_all));
++ break;
+ case CHIP_CARRIZO:
+ amdgpu_program_register_sequence(adev,
+ cz_mgcg_cgcg_init,
+@@ -706,7 +803,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
+ ib.ptr[2] = 0xDEADBEEF;
+ ib.length_dw = 3;
+
+- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
++ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
+ if (r)
+ goto err2;
+
+@@ -747,6 +844,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
++ const struct rlc_firmware_header_v2_0 *rlc_hdr;
++ unsigned int *tmp = NULL, i;
+
+ DRM_DEBUG("\n");
+
+@@ -763,6 +862,12 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
+ case CHIP_FIJI:
+ chip_name = "fiji";
+ break;
++ case CHIP_POLARIS11:
++ chip_name = "polaris11";
++ break;
++ case CHIP_POLARIS10:
++ chip_name = "polaris10";
++ break;
+ case CHIP_STONEY:
+ chip_name = "stoney";
+ break;
+@@ -808,9 +913,49 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
+- adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+- adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
++ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
++ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
++ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
++
++ adev->gfx.rlc.save_and_restore_offset =
++ le32_to_cpu(rlc_hdr->save_and_restore_offset);
++ adev->gfx.rlc.clear_state_descriptor_offset =
++ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
++ adev->gfx.rlc.avail_scratch_ram_locations =
++ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
++ adev->gfx.rlc.reg_restore_list_size =
++ le32_to_cpu(rlc_hdr->reg_restore_list_size);
++ adev->gfx.rlc.reg_list_format_start =
++ le32_to_cpu(rlc_hdr->reg_list_format_start);
++ adev->gfx.rlc.reg_list_format_separate_start =
++ le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
++ adev->gfx.rlc.starting_offsets_start =
++ le32_to_cpu(rlc_hdr->starting_offsets_start);
++ adev->gfx.rlc.reg_list_format_size_bytes =
++ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
++ adev->gfx.rlc.reg_list_size_bytes =
++ le32_to_cpu(rlc_hdr->reg_list_size_bytes);
++
++ adev->gfx.rlc.register_list_format =
++ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
++ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
++
++ if (!adev->gfx.rlc.register_list_format) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
++ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
++ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
++ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
++
++ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
++
++ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
++ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
++ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
++ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+ err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+@@ -911,6 +1056,270 @@ out:
+ return err;
+ }
+
++static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
++ volatile u32 *buffer)
++{
++ u32 count = 0, i;
++ const struct cs_section_def *sect = NULL;
++ const struct cs_extent_def *ext = NULL;
++
++ if (adev->gfx.rlc.cs_data == NULL)
++ return;
++ if (buffer == NULL)
++ return;
++
++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
++ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
++
++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
++ buffer[count++] = cpu_to_le32(0x80000000);
++ buffer[count++] = cpu_to_le32(0x80000000);
++
++ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
++ for (ext = sect->section; ext->extent != NULL; ++ext) {
++ if (sect->id == SECT_CONTEXT) {
++ buffer[count++] =
++ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
++ buffer[count++] = cpu_to_le32(ext->reg_index -
++ PACKET3_SET_CONTEXT_REG_START);
++ for (i = 0; i < ext->reg_count; i++)
++ buffer[count++] = cpu_to_le32(ext->extent[i]);
++ } else {
++ return;
++ }
++ }
++ }
++
++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
++ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
++ PACKET3_SET_CONTEXT_REG_START);
++ switch (adev->asic_type) {
++ case CHIP_TONGA:
++ case CHIP_POLARIS10:
++ buffer[count++] = cpu_to_le32(0x16000012);
++ buffer[count++] = cpu_to_le32(0x0000002A);
++ break;
++ case CHIP_POLARIS11:
++ buffer[count++] = cpu_to_le32(0x16000012);
++ buffer[count++] = cpu_to_le32(0x00000000);
++ break;
++ case CHIP_FIJI:
++ buffer[count++] = cpu_to_le32(0x3a00161a);
++ buffer[count++] = cpu_to_le32(0x0000002e);
++ break;
++ case CHIP_TOPAZ:
++ case CHIP_CARRIZO:
++ buffer[count++] = cpu_to_le32(0x00000002);
++ buffer[count++] = cpu_to_le32(0x00000000);
++ break;
++ case CHIP_STONEY:
++ buffer[count++] = cpu_to_le32(0x00000000);
++ buffer[count++] = cpu_to_le32(0x00000000);
++ break;
++ default:
++ buffer[count++] = cpu_to_le32(0x00000000);
++ buffer[count++] = cpu_to_le32(0x00000000);
++ break;
++ }
++
++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
++ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
++
++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
++ buffer[count++] = cpu_to_le32(0);
++}
++
++static void cz_init_cp_jump_table(struct amdgpu_device *adev)
++{
++ const __le32 *fw_data;
++ volatile u32 *dst_ptr;
++ int me, i, max_me = 4;
++ u32 bo_offset = 0;
++ u32 table_offset, table_size;
++
++ if (adev->asic_type == CHIP_CARRIZO)
++ max_me = 5;
++
++ /* write the cp table buffer */
++ dst_ptr = adev->gfx.rlc.cp_table_ptr;
++ for (me = 0; me < max_me; me++) {
++ if (me == 0) {
++ const struct gfx_firmware_header_v1_0 *hdr =
++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
++ fw_data = (const __le32 *)
++ (adev->gfx.ce_fw->data +
++ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
++ table_offset = le32_to_cpu(hdr->jt_offset);
++ table_size = le32_to_cpu(hdr->jt_size);
++ } else if (me == 1) {
++ const struct gfx_firmware_header_v1_0 *hdr =
++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
++ fw_data = (const __le32 *)
++ (adev->gfx.pfp_fw->data +
++ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
++ table_offset = le32_to_cpu(hdr->jt_offset);
++ table_size = le32_to_cpu(hdr->jt_size);
++ } else if (me == 2) {
++ const struct gfx_firmware_header_v1_0 *hdr =
++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
++ fw_data = (const __le32 *)
++ (adev->gfx.me_fw->data +
++ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
++ table_offset = le32_to_cpu(hdr->jt_offset);
++ table_size = le32_to_cpu(hdr->jt_size);
++ } else if (me == 3) {
++ const struct gfx_firmware_header_v1_0 *hdr =
++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
++ fw_data = (const __le32 *)
++ (adev->gfx.mec_fw->data +
++ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
++ table_offset = le32_to_cpu(hdr->jt_offset);
++ table_size = le32_to_cpu(hdr->jt_size);
++ } else if (me == 4) {
++ const struct gfx_firmware_header_v1_0 *hdr =
++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
++ fw_data = (const __le32 *)
++ (adev->gfx.mec2_fw->data +
++ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
++ table_offset = le32_to_cpu(hdr->jt_offset);
++ table_size = le32_to_cpu(hdr->jt_size);
++ }
++
++ for (i = 0; i < table_size; i ++) {
++ dst_ptr[bo_offset + i] =
++ cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
++ }
++
++ bo_offset += table_size;
++ }
++}
++
++static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
++{
++ int r;
++
++ /* clear state block */
++ if (adev->gfx.rlc.clear_state_obj) {
++ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
++ if (unlikely(r != 0))
++ dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
++ amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
++ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
++
++ amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
++ adev->gfx.rlc.clear_state_obj = NULL;
++ }
++
++ /* jump table block */
++ if (adev->gfx.rlc.cp_table_obj) {
++ r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
++ if (unlikely(r != 0))
++ dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
++ amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
++ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
++
++ amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
++ adev->gfx.rlc.cp_table_obj = NULL;
++ }
++}
++
++static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
++{
++ volatile u32 *dst_ptr;
++ u32 dws;
++ const struct cs_section_def *cs_data;
++ int r;
++
++ adev->gfx.rlc.cs_data = vi_cs_data;
++
++ cs_data = adev->gfx.rlc.cs_data;
++
++ if (cs_data) {
++ /* clear state block */
++ adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
++
++ if (adev->gfx.rlc.clear_state_obj == NULL) {
++ r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
++ AMDGPU_GEM_DOMAIN_VRAM,
++ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
++ NULL, NULL,
++ &adev->gfx.rlc.clear_state_obj);
++ if (r) {
++ dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
++ gfx_v8_0_rlc_fini(adev);
++ return r;
++ }
++ }
++ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
++ if (unlikely(r != 0)) {
++ gfx_v8_0_rlc_fini(adev);
++ return r;
++ }
++ r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
++ &adev->gfx.rlc.clear_state_gpu_addr);
++ if (r) {
++ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
++ dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
++ gfx_v8_0_rlc_fini(adev);
++ return r;
++ }
++
++ r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
++ if (r) {
++ dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
++ gfx_v8_0_rlc_fini(adev);
++ return r;
++ }
++ /* set up the cs buffer */
++ dst_ptr = adev->gfx.rlc.cs_ptr;
++ gfx_v8_0_get_csb_buffer(adev, dst_ptr);
++ amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
++ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
++ }
++
++ if ((adev->asic_type == CHIP_CARRIZO) ||
++ (adev->asic_type == CHIP_STONEY)) {
++ adev->gfx.rlc.cp_table_size = (96 * 5 * 4) + (64 * 1024); /* JT + GDS */
++ if (adev->gfx.rlc.cp_table_obj == NULL) {
++ r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
++ AMDGPU_GEM_DOMAIN_VRAM,
++ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
++ NULL, NULL,
++ &adev->gfx.rlc.cp_table_obj);
++ if (r) {
++ dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
++ return r;
++ }
++ }
++
++ r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
++ if (unlikely(r != 0)) {
++ dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
++ return r;
++ }
++ r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
++ &adev->gfx.rlc.cp_table_gpu_addr);
++ if (r) {
++ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
++ dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
++ return r;
++ }
++ r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
++ if (r) {
++ dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
++ return r;
++ }
++
++ cz_init_cp_jump_table(adev);
++
++ amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
++ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
++
++ }
++
++ return 0;
++}
++
+ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
+ {
+ int r;
+@@ -1262,7 +1671,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* shedule the ib on the ring */
+- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
++ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
+ if (r) {
+ DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
+ goto fail;
+@@ -1296,12 +1705,13 @@ fail:
+ return r;
+ }
+
+-static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
++static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
+ {
+ u32 gb_addr_config;
+ u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
+ u32 tmp;
++ int ret;
+
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+@@ -1338,6 +1748,34 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
+ break;
++ case CHIP_POLARIS11:
++ ret = amdgpu_atombios_get_gfx_info(adev);
++ if (ret)
++ return ret;
++ adev->gfx.config.max_gprs = 256;
++ adev->gfx.config.max_gs_threads = 32;
++ adev->gfx.config.max_hw_contexts = 8;
++
++ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
++ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
++ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
++ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
++ gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
++ break;
++ case CHIP_POLARIS10:
++ ret = amdgpu_atombios_get_gfx_info(adev);
++ if (ret)
++ return ret;
++ adev->gfx.config.max_gprs = 256;
++ adev->gfx.config.max_gs_threads = 32;
++ adev->gfx.config.max_hw_contexts = 8;
++
++ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
++ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
++ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
++ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
++ gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
++ break;
+ case CHIP_TONGA:
+ adev->gfx.config.max_shader_engines = 4;
+ adev->gfx.config.max_tile_pipes = 8;
+@@ -1520,6 +1958,8 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
+ break;
+ }
+ adev->gfx.config.gb_addr_config = gb_addr_config;
++
++ return 0;
+ }
+
+ static int gfx_v8_0_sw_init(void *handle)
+@@ -1553,6 +1993,12 @@ static int gfx_v8_0_sw_init(void *handle)
+ return r;
+ }
+
++ r = gfx_v8_0_rlc_init(adev);
++ if (r) {
++ DRM_ERROR("Failed to init rlc BOs!\n");
++ return r;
++ }
++
+ r = gfx_v8_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+@@ -1570,7 +2016,7 @@ static int gfx_v8_0_sw_init(void *handle)
+ ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
+ }
+
+- r = amdgpu_ring_init(adev, ring, 1024 * 1024,
++ r = amdgpu_ring_init(adev, ring, 1024,
+ PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
+ &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
+ AMDGPU_RING_TYPE_GFX);
+@@ -1594,10 +2040,10 @@ static int gfx_v8_0_sw_init(void *handle)
+ ring->me = 1; /* first MEC */
+ ring->pipe = i / 8;
+ ring->queue = i % 8;
+- sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
++ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+- r = amdgpu_ring_init(adev, ring, 1024 * 1024,
++ r = amdgpu_ring_init(adev, ring, 1024,
+ PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
+ &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_TYPE_COMPUTE);
+@@ -1629,7 +2075,9 @@ static int gfx_v8_0_sw_init(void *handle)
+
+ adev->gfx.ce_ram_size = 0x8000;
+
+- gfx_v8_0_gpu_early_init(adev);
++ r = gfx_v8_0_gpu_early_init(adev);
++ if (r)
++ return r;
+
+ return 0;
+ }
+@@ -1650,6 +2098,10 @@ static int gfx_v8_0_sw_fini(void *handle)
+
+ gfx_v8_0_mec_fini(adev);
+
++ gfx_v8_0_rlc_fini(adev);
++
++ kfree(adev->gfx.rlc.register_list_format);
++
+ return 0;
+ }
+
+@@ -2219,77 +2671,481 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
+
+ break;
+- case CHIP_STONEY:
++ case CHIP_POLARIS11:
+ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+- PIPE_CONFIG(ADDR_SURF_P2));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16));
+ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+- PIPE_CONFIG(ADDR_SURF_P2) |
+- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++
++ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
++ NUM_BANKS(ADDR_SURF_8_BANK));
++
++ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
++ NUM_BANKS(ADDR_SURF_4_BANK));
++
++ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
++ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
++
++ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
++ if (reg_offset != 7)
++ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
++
++ break;
++ case CHIP_POLARIS10:
++ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
++ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++
++ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
++ NUM_BANKS(ADDR_SURF_16_BANK));
++
++ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
++ NUM_BANKS(ADDR_SURF_8_BANK));
++
++ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
++ NUM_BANKS(ADDR_SURF_4_BANK));
++
++ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
++ NUM_BANKS(ADDR_SURF_4_BANK));
++
++ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
++ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
++
++ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
++ if (reg_offset != 7)
++ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
++
++ break;
++ case CHIP_STONEY:
++ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
++ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
++ PIPE_CONFIG(ADDR_SURF_P2));
++ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
++ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
++ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
++ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
++ PIPE_CONFIG(ADDR_SURF_P2) |
++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+@@ -2695,6 +3551,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
+ gfx_v8_0_tiling_mode_table_init(adev);
+
+ gfx_v8_0_setup_rb(adev);
++ gfx_v8_0_get_cu_info(adev);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+@@ -2775,17 +3632,263 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
+ }
+ }
+
+-static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+- bool enable)
++static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
++ bool enable)
++{
++ u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
++
++ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
++ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
++ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
++ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
++
++ WREG32(mmCP_INT_CNTL_RING0, tmp);
++}
++
++static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
++{
++ /* csib */
++ WREG32(mmRLC_CSIB_ADDR_HI,
++ adev->gfx.rlc.clear_state_gpu_addr >> 32);
++ WREG32(mmRLC_CSIB_ADDR_LO,
++ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
++ WREG32(mmRLC_CSIB_LENGTH,
++ adev->gfx.rlc.clear_state_size);
++}
++
++static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
++ int ind_offset,
++ int list_size,
++ int *unique_indices,
++ int *indices_count,
++ int max_indices,
++ int *ind_start_offsets,
++ int *offset_count,
++ int max_offset)
++{
++ int indices;
++ bool new_entry = true;
++
++ for (; ind_offset < list_size; ind_offset++) {
++
++ if (new_entry) {
++ new_entry = false;
++ ind_start_offsets[*offset_count] = ind_offset;
++ *offset_count = *offset_count + 1;
++ BUG_ON(*offset_count >= max_offset);
++ }
++
++ if (register_list_format[ind_offset] == 0xFFFFFFFF) {
++ new_entry = true;
++ continue;
++ }
++
++ ind_offset += 2;
++
++ /* look for the matching indice */
++ for (indices = 0;
++ indices < *indices_count;
++ indices++) {
++ if (unique_indices[indices] ==
++ register_list_format[ind_offset])
++ break;
++ }
++
++ if (indices >= *indices_count) {
++ unique_indices[*indices_count] =
++ register_list_format[ind_offset];
++ indices = *indices_count;
++ *indices_count = *indices_count + 1;
++ BUG_ON(*indices_count >= max_indices);
++ }
++
++ register_list_format[ind_offset] = indices;
++ }
++}
++
++static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
++{
++ int i, temp, data;
++ int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
++ int indices_count = 0;
++ int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
++ int offset_count = 0;
++
++ int list_size;
++ unsigned int *register_list_format =
++ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
++ if (register_list_format == NULL)
++ return -ENOMEM;
++ memcpy(register_list_format, adev->gfx.rlc.register_list_format,
++ adev->gfx.rlc.reg_list_format_size_bytes);
++
++ gfx_v8_0_parse_ind_reg_list(register_list_format,
++ RLC_FormatDirectRegListLength,
++ adev->gfx.rlc.reg_list_format_size_bytes >> 2,
++ unique_indices,
++ &indices_count,
++ sizeof(unique_indices) / sizeof(int),
++ indirect_start_offsets,
++ &offset_count,
++ sizeof(indirect_start_offsets)/sizeof(int));
++
++ /* save and restore list */
++ temp = RREG32(mmRLC_SRM_CNTL);
++ temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
++ WREG32(mmRLC_SRM_CNTL, temp);
++
++ WREG32(mmRLC_SRM_ARAM_ADDR, 0);
++ for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
++ WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
++
++ /* indirect list */
++ WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
++ for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
++ WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
++
++ list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
++ list_size = list_size >> 1;
++ WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
++ WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
++
++ /* starting offsets starts */
++ WREG32(mmRLC_GPM_SCRATCH_ADDR,
++ adev->gfx.rlc.starting_offsets_start);
++ for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
++ WREG32(mmRLC_GPM_SCRATCH_DATA,
++ indirect_start_offsets[i]);
++
++ /* unique indices */
++ temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
++ data = mmRLC_SRM_INDEX_CNTL_DATA_0;
++ for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
++ amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
++ amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
++ }
++ kfree(register_list_format);
++
++ return 0;
++}
++
++static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
++{
++ uint32_t data;
++
++ data = RREG32(mmRLC_SRM_CNTL);
++ data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
++ WREG32(mmRLC_SRM_CNTL, data);
++}
++
++static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
++{
++ uint32_t data;
++
++ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
++ AMD_PG_SUPPORT_GFX_SMG |
++ AMD_PG_SUPPORT_GFX_DMG)) {
++ data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
++ data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
++ data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
++ WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
++
++ data = 0;
++ data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
++ data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
++ data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
++ data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
++ WREG32(mmRLC_PG_DELAY, data);
++
++ data = RREG32(mmRLC_PG_DELAY_2);
++ data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
++ data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
++ WREG32(mmRLC_PG_DELAY_2, data);
++
++ data = RREG32(mmRLC_AUTO_PG_CTRL);
++ data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
++ data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
++ WREG32(mmRLC_AUTO_PG_CTRL, data);
++ }
++}
++
++static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
++ bool enable)
+ {
+- u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
++ u32 data, orig;
+
+- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
+- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
+- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
+- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
++ orig = data = RREG32(mmRLC_PG_CNTL);
+
+- WREG32(mmCP_INT_CNTL_RING0, tmp);
++ if (enable)
++ data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
++ else
++ data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
++
++ if (orig != data)
++ WREG32(mmRLC_PG_CNTL, data);
++}
++
++static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
++ bool enable)
++{
++ u32 data, orig;
++
++ orig = data = RREG32(mmRLC_PG_CNTL);
++
++ if (enable)
++ data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
++ else
++ data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
++
++ if (orig != data)
++ WREG32(mmRLC_PG_CNTL, data);
++}
++
++static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
++{
++ u32 data, orig;
++
++ orig = data = RREG32(mmRLC_PG_CNTL);
++
++ if (enable)
++ data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
++ else
++ data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
++
++ if (orig != data)
++ WREG32(mmRLC_PG_CNTL, data);
++}
++
++static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
++{
++ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
++ AMD_PG_SUPPORT_GFX_SMG |
++ AMD_PG_SUPPORT_GFX_DMG |
++ AMD_PG_SUPPORT_CP |
++ AMD_PG_SUPPORT_GDS |
++ AMD_PG_SUPPORT_RLC_SMU_HS)) {
++ gfx_v8_0_init_csb(adev);
++ gfx_v8_0_init_save_restore_list(adev);
++ gfx_v8_0_enable_save_restore_machine(adev);
++
++ if ((adev->asic_type == CHIP_CARRIZO) ||
++ (adev->asic_type == CHIP_STONEY)) {
++ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
++ gfx_v8_0_init_power_gating(adev);
++ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
++ if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
++ cz_enable_sck_slow_down_on_power_up(adev, true);
++ cz_enable_sck_slow_down_on_power_down(adev, true);
++ } else {
++ cz_enable_sck_slow_down_on_power_up(adev, false);
++ cz_enable_sck_slow_down_on_power_down(adev, false);
++ }
++ if (adev->pg_flags & AMD_PG_SUPPORT_CP)
++ cz_enable_cp_power_gating(adev, true);
++ else
++ cz_enable_cp_power_gating(adev, false);
++ } else if (adev->asic_type == CHIP_POLARIS11) {
++ gfx_v8_0_init_power_gating(adev);
++ }
++ }
+ }
+
+ void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
+@@ -2858,12 +3961,17 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
+
+ /* disable CG */
+ WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
++ if (adev->asic_type == CHIP_POLARIS11 ||
++ adev->asic_type == CHIP_POLARIS10)
++ WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
+
+ /* disable PG */
+ WREG32(mmRLC_PG_CNTL, 0);
+
+ gfx_v8_0_rlc_reset(adev);
+
++ gfx_v8_0_init_pg(adev);
++
+ if (!adev->pp_enabled) {
+ if (!adev->firmware.smu_load) {
+ /* legacy rlc firmware loading */
+@@ -3035,9 +4143,14 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
+ amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
+ switch (adev->asic_type) {
+ case CHIP_TONGA:
++ case CHIP_POLARIS10:
+ amdgpu_ring_write(ring, 0x16000012);
+ amdgpu_ring_write(ring, 0x0000002A);
+ break;
++ case CHIP_POLARIS11:
++ amdgpu_ring_write(ring, 0x16000012);
++ amdgpu_ring_write(ring, 0x00000000);
++ break;
+ case CHIP_FIJI:
+ amdgpu_ring_write(ring, 0x3a00161a);
+ amdgpu_ring_write(ring, 0x0000002e);
+@@ -3122,6 +4235,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
++ DOORBELL_HIT, 0);
++ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+@@ -3679,7 +4794,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
+ if (use_doorbell) {
+ if ((adev->asic_type == CHIP_CARRIZO) ||
+ (adev->asic_type == CHIP_FIJI) ||
+- (adev->asic_type == CHIP_STONEY)) {
++ (adev->asic_type == CHIP_STONEY) ||
++ (adev->asic_type == CHIP_POLARIS11) ||
++ (adev->asic_type == CHIP_POLARIS10)) {
+ WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
+ AMDGPU_DOORBELL_KIQ << 2);
+ WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
+@@ -3713,7 +4830,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
+ mqd->cp_hqd_persistent_state = tmp;
+- if (adev->asic_type == CHIP_STONEY) {
++ if (adev->asic_type == CHIP_STONEY ||
++ adev->asic_type == CHIP_POLARIS11 ||
++ adev->asic_type == CHIP_POLARIS10) {
+ tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
+ WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
+@@ -3845,6 +4964,9 @@ static int gfx_v8_0_hw_fini(void *handle)
+ gfx_v8_0_rlc_stop(adev);
+ gfx_v8_0_cp_compute_fini(adev);
+
++ amdgpu_set_powergating_state(adev,
++ AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
++
+ return 0;
+ }
+
+@@ -3889,185 +5011,6 @@ static int gfx_v8_0_wait_for_idle(void *handle)
+ return -ETIMEDOUT;
+ }
+
+-static void gfx_v8_0_print_status(void *handle)
+-{
+- int i;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+- dev_info(adev->dev, "GFX 8.x registers\n");
+- dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
+- RREG32(mmGRBM_STATUS));
+- dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
+- RREG32(mmGRBM_STATUS2));
+- dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+- RREG32(mmGRBM_STATUS_SE0));
+- dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+- RREG32(mmGRBM_STATUS_SE1));
+- dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
+- RREG32(mmGRBM_STATUS_SE2));
+- dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
+- RREG32(mmGRBM_STATUS_SE3));
+- dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
+- dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
+- RREG32(mmCP_STALLED_STAT1));
+- dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
+- RREG32(mmCP_STALLED_STAT2));
+- dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
+- RREG32(mmCP_STALLED_STAT3));
+- dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
+- RREG32(mmCP_CPF_BUSY_STAT));
+- dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
+- RREG32(mmCP_CPF_STALLED_STAT1));
+- dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
+- dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
+- dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
+- RREG32(mmCP_CPC_STALLED_STAT1));
+- dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
+-
+- for (i = 0; i < 32; i++) {
+- dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
+- i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
+- }
+- for (i = 0; i < 16; i++) {
+- dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
+- i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
+- }
+- for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+- dev_info(adev->dev, " se: %d\n", i);
+- gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
+- dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
+- RREG32(mmPA_SC_RASTER_CONFIG));
+- dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
+- RREG32(mmPA_SC_RASTER_CONFIG_1));
+- }
+- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+-
+- dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
+- RREG32(mmGB_ADDR_CONFIG));
+- dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
+- RREG32(mmHDP_ADDR_CONFIG));
+- dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
+- RREG32(mmDMIF_ADDR_CALC));
+-
+- dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
+- RREG32(mmCP_MEQ_THRESHOLDS));
+- dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
+- RREG32(mmSX_DEBUG_1));
+- dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
+- RREG32(mmTA_CNTL_AUX));
+- dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
+- RREG32(mmSPI_CONFIG_CNTL));
+- dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
+- RREG32(mmSQ_CONFIG));
+- dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
+- RREG32(mmDB_DEBUG));
+- dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
+- RREG32(mmDB_DEBUG2));
+- dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
+- RREG32(mmDB_DEBUG3));
+- dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
+- RREG32(mmCB_HW_CONTROL));
+- dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
+- RREG32(mmSPI_CONFIG_CNTL_1));
+- dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
+- RREG32(mmPA_SC_FIFO_SIZE));
+- dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
+- RREG32(mmVGT_NUM_INSTANCES));
+- dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
+- RREG32(mmCP_PERFMON_CNTL));
+- dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
+- RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
+- dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
+- RREG32(mmVGT_CACHE_INVALIDATION));
+- dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
+- RREG32(mmVGT_GS_VERTEX_REUSE));
+- dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
+- RREG32(mmPA_SC_LINE_STIPPLE_STATE));
+- dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
+- RREG32(mmPA_CL_ENHANCE));
+- dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
+- RREG32(mmPA_SC_ENHANCE));
+-
+- dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
+- RREG32(mmCP_ME_CNTL));
+- dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
+- RREG32(mmCP_MAX_CONTEXT));
+- dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
+- RREG32(mmCP_ENDIAN_SWAP));
+- dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
+- RREG32(mmCP_DEVICE_ID));
+-
+- dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
+- RREG32(mmCP_SEM_WAIT_TIMER));
+-
+- dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
+- RREG32(mmCP_RB_WPTR_DELAY));
+- dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
+- RREG32(mmCP_RB_VMID));
+- dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
+- RREG32(mmCP_RB0_CNTL));
+- dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
+- RREG32(mmCP_RB0_WPTR));
+- dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
+- RREG32(mmCP_RB0_RPTR_ADDR));
+- dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
+- RREG32(mmCP_RB0_RPTR_ADDR_HI));
+- dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
+- RREG32(mmCP_RB0_CNTL));
+- dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
+- RREG32(mmCP_RB0_BASE));
+- dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
+- RREG32(mmCP_RB0_BASE_HI));
+- dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
+- RREG32(mmCP_MEC_CNTL));
+- dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
+- RREG32(mmCP_CPF_DEBUG));
+-
+- dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
+- RREG32(mmSCRATCH_ADDR));
+- dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
+- RREG32(mmSCRATCH_UMSK));
+-
+- dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
+- RREG32(mmCP_INT_CNTL_RING0));
+- dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
+- RREG32(mmRLC_LB_CNTL));
+- dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
+- RREG32(mmRLC_CNTL));
+- dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
+- RREG32(mmRLC_CGCG_CGLS_CTRL));
+- dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
+- RREG32(mmRLC_LB_CNTR_INIT));
+- dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
+- RREG32(mmRLC_LB_CNTR_MAX));
+- dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
+- RREG32(mmRLC_LB_INIT_CU_MASK));
+- dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
+- RREG32(mmRLC_LB_PARAMS));
+- dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
+- RREG32(mmRLC_LB_CNTL));
+- dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
+- RREG32(mmRLC_MC_CNTL));
+- dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
+- RREG32(mmRLC_UCODE_CNTL));
+-
+- mutex_lock(&adev->srbm_mutex);
+- for (i = 0; i < 16; i++) {
+- vi_srbm_select(adev, 0, 0, 0, i);
+- dev_info(adev->dev, " VM %d:\n", i);
+- dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
+- RREG32(mmSH_MEM_CONFIG));
+- dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
+- RREG32(mmSH_MEM_APE1_BASE));
+- dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
+- RREG32(mmSH_MEM_APE1_LIMIT));
+- dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
+- RREG32(mmSH_MEM_BASES));
+- }
+- vi_srbm_select(adev, 0, 0, 0, 0);
+- mutex_unlock(&adev->srbm_mutex);
+-}
+-
+ static int gfx_v8_0_soft_reset(void *handle)
+ {
+ u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+@@ -4108,7 +5051,6 @@ static int gfx_v8_0_soft_reset(void *handle)
+ SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+- gfx_v8_0_print_status((void *)adev);
+ /* stop the rlc */
+ gfx_v8_0_rlc_stop(adev);
+
+@@ -4168,7 +5110,6 @@ static int gfx_v8_0_soft_reset(void *handle)
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+- gfx_v8_0_print_status((void *)adev);
+ }
+ return 0;
+ }
+@@ -4250,6 +5191,7 @@ static int gfx_v8_0_early_init(void *handle)
+ gfx_v8_0_set_ring_funcs(adev);
+ gfx_v8_0_set_irq_funcs(adev);
+ gfx_v8_0_set_gds_init(adev);
++ gfx_v8_0_set_rlc_funcs(adev);
+
+ return 0;
+ }
+@@ -4272,17 +5214,181 @@ static int gfx_v8_0_late_init(void *handle)
+ if (r)
+ return r;
+
++ amdgpu_set_powergating_state(adev,
++ AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
++
+ return 0;
+ }
+
++static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
++ bool enable)
++{
++ uint32_t data, temp;
++
++ if (adev->asic_type == CHIP_POLARIS11)
++ /* Send msg to SMU via Powerplay */
++ amdgpu_set_powergating_state(adev,
++ AMD_IP_BLOCK_TYPE_SMC,
++ enable ?
++ AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
++
++ if (enable) {
++ /* Enable static MGPG */
++ temp = data = RREG32(mmRLC_PG_CNTL);
++ data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
++
++ if (temp != data)
++ WREG32(mmRLC_PG_CNTL, data);
++ } else {
++ temp = data = RREG32(mmRLC_PG_CNTL);
++ data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
++
++ if (temp != data)
++ WREG32(mmRLC_PG_CNTL, data);
++ }
++}
++
++static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
++ bool enable)
++{
++ uint32_t data, temp;
++
++ if (enable) {
++ /* Enable dynamic MGPG */
++ temp = data = RREG32(mmRLC_PG_CNTL);
++ data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
++
++ if (temp != data)
++ WREG32(mmRLC_PG_CNTL, data);
++ } else {
++ temp = data = RREG32(mmRLC_PG_CNTL);
++ data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
++
++ if (temp != data)
++ WREG32(mmRLC_PG_CNTL, data);
++ }
++}
++
++static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
++ bool enable)
++{
++ uint32_t data, temp;
++
++ if (enable) {
++ /* Enable quick PG */
++ temp = data = RREG32(mmRLC_PG_CNTL);
++ data |= 0x100000;
++
++ if (temp != data)
++ WREG32(mmRLC_PG_CNTL, data);
++ } else {
++ temp = data = RREG32(mmRLC_PG_CNTL);
++ data &= ~0x100000;
++
++ if (temp != data)
++ WREG32(mmRLC_PG_CNTL, data);
++ }
++}
++
++static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
++ bool enable)
++{
++ u32 data, orig;
++
++ orig = data = RREG32(mmRLC_PG_CNTL);
++
++ if (enable)
++ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
++ else
++ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
++
++ if (orig != data)
++ WREG32(mmRLC_PG_CNTL, data);
++}
++
++static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
++ bool enable)
++{
++ u32 data, orig;
++
++ orig = data = RREG32(mmRLC_PG_CNTL);
++
++ if (enable)
++ data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
++ else
++ data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
++
++ if (orig != data)
++ WREG32(mmRLC_PG_CNTL, data);
++
++ /* Read any GFX register to wake up GFX. */
++ if (!enable)
++ data = RREG32(mmDB_RENDER_CONTROL);
++}
++
++static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
++ bool enable)
++{
++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
++ cz_enable_gfx_cg_power_gating(adev, true);
++ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
++ cz_enable_gfx_pipeline_power_gating(adev, true);
++ } else {
++ cz_enable_gfx_cg_power_gating(adev, false);
++ cz_enable_gfx_pipeline_power_gating(adev, false);
++ }
++}
++
+ static int gfx_v8_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+ {
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++ bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
++
++ if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
++ return 0;
++
++ switch (adev->asic_type) {
++ case CHIP_CARRIZO:
++ case CHIP_STONEY:
++ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
++ cz_update_gfx_cg_power_gating(adev, enable);
++
++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
++ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
++ else
++ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
++
++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
++ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
++ else
++ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
++ break;
++ case CHIP_POLARIS11:
++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
++ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
++ else
++ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
++
++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
++ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
++ else
++ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
++
++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
++ polaris11_enable_gfx_quick_mg_power_gating(adev, true);
++ else
++ polaris11_enable_gfx_quick_mg_power_gating(adev, false);
++ break;
++ default:
++ break;
++ }
++
+ return 0;
+ }
+
+-static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
+- uint32_t reg_addr, uint32_t cmd)
++static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
++ uint32_t reg_addr, uint32_t cmd)
+ {
+ uint32_t data;
+
+@@ -4292,7 +5398,8 @@ static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
+ WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+
+ data = RREG32(mmRLC_SERDES_WR_CTRL);
+- data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
++ if (adev->asic_type == CHIP_STONEY)
++ data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
+ RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
+ RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
+ RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
+@@ -4300,42 +5407,218 @@ static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
+ RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
+ RLC_SERDES_WR_CTRL__POWER_UP_MASK |
+ RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
+- RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
+- RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
+ RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
++ else
++ data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
++ RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
++ RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
++ RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
++ RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
++ RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
++ RLC_SERDES_WR_CTRL__POWER_UP_MASK |
++ RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
++ RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
++ RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
++ RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
+ data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
+- (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
+- (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
+- (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
++ (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
++ (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
++ (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
+
+ WREG32(mmRLC_SERDES_WR_CTRL, data);
+ }
+
+-static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+- bool enable)
++#define MSG_ENTER_RLC_SAFE_MODE 1
++#define MSG_EXIT_RLC_SAFE_MODE 0
++
++#define RLC_GPR_REG2__REQ_MASK 0x00000001
++#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
++#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
++
++static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
++{
++ u32 data = 0;
++ unsigned i;
++
++ data = RREG32(mmRLC_CNTL);
++ if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
++ return;
++
++ if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
++ (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
++ AMD_PG_SUPPORT_GFX_DMG))) {
++ data |= RLC_GPR_REG2__REQ_MASK;
++ data &= ~RLC_GPR_REG2__MESSAGE_MASK;
++ data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
++ WREG32(mmRLC_GPR_REG2, data);
++
++ for (i = 0; i < adev->usec_timeout; i++) {
++ if ((RREG32(mmRLC_GPM_STAT) &
++ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
++ RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
++ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
++ RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
++ break;
++ udelay(1);
++ }
++
++ for (i = 0; i < adev->usec_timeout; i++) {
++ if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
++ break;
++ udelay(1);
++ }
++ adev->gfx.rlc.in_safe_mode = true;
++ }
++}
++
++static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
++{
++ u32 data;
++ unsigned i;
++
++ data = RREG32(mmRLC_CNTL);
++ if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
++ return;
++
++ if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
++ (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
++ AMD_PG_SUPPORT_GFX_DMG))) {
++ data |= RLC_GPR_REG2__REQ_MASK;
++ data &= ~RLC_GPR_REG2__MESSAGE_MASK;
++ data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
++ WREG32(mmRLC_GPR_REG2, data);
++ adev->gfx.rlc.in_safe_mode = false;
++ }
++
++ for (i = 0; i < adev->usec_timeout; i++) {
++ if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
++ break;
++ udelay(1);
++ }
++}
++
++static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
++{
++ u32 data;
++ unsigned i;
++
++ data = RREG32(mmRLC_CNTL);
++ if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
++ return;
++
++ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
++ data |= RLC_SAFE_MODE__CMD_MASK;
++ data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
++ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
++ WREG32(mmRLC_SAFE_MODE, data);
++
++ for (i = 0; i < adev->usec_timeout; i++) {
++ if ((RREG32(mmRLC_GPM_STAT) &
++ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
++ RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
++ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
++ RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
++ break;
++ udelay(1);
++ }
++
++ for (i = 0; i < adev->usec_timeout; i++) {
++ if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
++ break;
++ udelay(1);
++ }
++ adev->gfx.rlc.in_safe_mode = true;
++ }
++}
++
++static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
++{
++ u32 data = 0;
++ unsigned i;
++
++ data = RREG32(mmRLC_CNTL);
++ if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
++ return;
++
++ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
++ if (adev->gfx.rlc.in_safe_mode) {
++ data |= RLC_SAFE_MODE__CMD_MASK;
++ data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
++ WREG32(mmRLC_SAFE_MODE, data);
++ adev->gfx.rlc.in_safe_mode = false;
++ }
++ }
++
++ for (i = 0; i < adev->usec_timeout; i++) {
++ if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
++ break;
++ udelay(1);
++ }
++}
++
++static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
++{
++ adev->gfx.rlc.in_safe_mode = true;
++}
++
++static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
++{
++ adev->gfx.rlc.in_safe_mode = false;
++}
++
++static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
++ .enter_safe_mode = cz_enter_rlc_safe_mode,
++ .exit_safe_mode = cz_exit_rlc_safe_mode
++};
++
++static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
++ .enter_safe_mode = iceland_enter_rlc_safe_mode,
++ .exit_safe_mode = iceland_exit_rlc_safe_mode
++};
++
++static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
++ .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
++ .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
++};
++
++static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
++ bool enable)
+ {
+ uint32_t temp, data;
+
++ adev->gfx.rlc.funcs->enter_safe_mode(adev);
++
+ /* It is disabled by HW by default */
+- if (enable) {
+- /* 1 - RLC memory Light sleep */
+- temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
+- data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+- if (temp != data)
+- WREG32(mmRLC_MEM_SLP_CNTL, data);
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
++ /* 1 - RLC memory Light sleep */
++ temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
++ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
++ if (temp != data)
++ WREG32(mmRLC_MEM_SLP_CNTL, data);
++ }
+
+- /* 2 - CP memory Light sleep */
+- temp = data = RREG32(mmCP_MEM_SLP_CNTL);
+- data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+- if (temp != data)
+- WREG32(mmCP_MEM_SLP_CNTL, data);
++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
++ /* 2 - CP memory Light sleep */
++ temp = data = RREG32(mmCP_MEM_SLP_CNTL);
++ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
++ if (temp != data)
++ WREG32(mmCP_MEM_SLP_CNTL, data);
++ }
++ }
+
+ /* 3 - RLC_CGTT_MGCG_OVERRIDE */
+ temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+- data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
+- RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
+- RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
+- RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
++ if (adev->flags & AMD_IS_APU)
++ data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
++ RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
++ RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
++ else
++ data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
++ RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
++ RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
++ RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
+
+ if (temp != data)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
+@@ -4344,19 +5627,23 @@ static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* 5 - clear mgcg override */
+- fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
+-
+- /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
+- temp = data = RREG32(mmCGTS_SM_CTRL_REG);
+- data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
+- data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
+- data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
+- data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
+- data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
+- data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
+- data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
+- if (temp != data)
+- WREG32(mmCGTS_SM_CTRL_REG, data);
++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
++
++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
++ /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
++ temp = data = RREG32(mmCGTS_SM_CTRL_REG);
++ data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
++ data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
++ data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
++ data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
++ if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
++ (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
++ data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
++ data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
++ data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
++ if (temp != data)
++ WREG32(mmCGTS_SM_CTRL_REG, data);
++ }
+ udelay(50);
+
+ /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+@@ -4396,23 +5683,27 @@ static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* 6 - set mgcg override */
+- fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
+
+ udelay(50);
+
+ /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+ }
++
++ adev->gfx.rlc.funcs->exit_safe_mode(adev);
+ }
+
+-static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+- bool enable)
++static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
++ bool enable)
+ {
+ uint32_t temp, temp1, data, data1;
+
+ temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
+
+- if (enable) {
++ adev->gfx.rlc.funcs->enter_safe_mode(adev);
++
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
+ * Cmp_busy/GFX_Idle interrupts
+ */
+@@ -4427,25 +5718,29 @@ static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* 3 - clear cgcg override */
+- fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
+
+ /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* 4 - write cmd to set CGLS */
+- fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
+
+ /* 5 - enable cgcg */
+ data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+- /* enable cgls*/
+- data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
++ /* enable cgls*/
++ data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+- temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+- data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
++ temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
++ data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
+
+- if (temp1 != data1)
+- WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
++ if (temp1 != data1)
++ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
++ } else {
++ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
++ }
+
+ if (temp != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+@@ -4470,36 +5765,38 @@ static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* write cmd to Set CGCG Overrride */
+- fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
+
+ /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* write cmd to Clear CGLS */
+- fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
+
+ /* disable cgcg, cgls should be disabled too. */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
+- RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
++ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ if (temp != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+ }
++
++ adev->gfx.rlc.funcs->exit_safe_mode(adev);
+ }
+-static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
+- bool enable)
++static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
++ bool enable)
+ {
+ if (enable) {
+ /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
+ * === MGCG + MGLS + TS(CG/LS) ===
+ */
+- fiji_update_medium_grain_clock_gating(adev, enable);
+- fiji_update_coarse_grain_clock_gating(adev, enable);
++ gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
++ gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
+ * === CGCG + CGLS ===
+ */
+- fiji_update_coarse_grain_clock_gating(adev, enable);
+- fiji_update_medium_grain_clock_gating(adev, enable);
++ gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
++ gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
+ }
+ return 0;
+ }
+@@ -4511,8 +5808,10 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+- fiji_update_gfx_clock_gating(adev,
+- state == AMD_CG_STATE_GATE ? true : false);
++ case CHIP_CARRIZO:
++ case CHIP_STONEY:
++ gfx_v8_0_update_gfx_clock_gating(adev,
++ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+@@ -4602,17 +5901,13 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+ }
+
+ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+- struct amdgpu_ib *ib)
++ struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch)
+ {
+- bool need_ctx_switch = ring->current_ctx != ib->ctx;
+ u32 header, control = 0;
+ u32 next_rptr = ring->wptr + 5;
+
+- /* drop the CE preamble IB for the same context */
+- if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
+- return;
+-
+- if (need_ctx_switch)
++ if (ctx_switch)
+ next_rptr += 2;
+
+ next_rptr += 4;
+@@ -4623,7 +5918,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ amdgpu_ring_write(ring, next_rptr);
+
+ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
+- if (need_ctx_switch) {
++ if (ctx_switch) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+@@ -4633,7 +5928,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ else
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+- control |= ib->length_dw | (ib->vm_id << 24);
++ control |= ib->length_dw | (vm_id << 24);
+
+ amdgpu_ring_write(ring, header);
+ amdgpu_ring_write(ring,
+@@ -4646,7 +5941,8 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ }
+
+ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+- struct amdgpu_ib *ib)
++ struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch)
+ {
+ u32 header, control = 0;
+ u32 next_rptr = ring->wptr + 5;
+@@ -4662,7 +5958,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+- control |= ib->length_dw | (ib->vm_id << 24);
++ control |= ib->length_dw | (vm_id << 24);
+
+ amdgpu_ring_write(ring, header);
+ amdgpu_ring_write(ring,
+@@ -4684,6 +5980,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
++ EOP_TC_WB_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+@@ -5022,6 +6319,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
+ }
+
+ const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
++ .name = "gfx_v8_0",
+ .early_init = gfx_v8_0_early_init,
+ .late_init = gfx_v8_0_late_init,
+ .sw_init = gfx_v8_0_sw_init,
+@@ -5033,7 +6331,6 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
+ .is_idle = gfx_v8_0_is_idle,
+ .wait_for_idle = gfx_v8_0_wait_for_idle,
+ .soft_reset = gfx_v8_0_soft_reset,
+- .print_status = gfx_v8_0_print_status,
+ .set_clockgating_state = gfx_v8_0_set_clockgating_state,
+ .set_powergating_state = gfx_v8_0_set_powergating_state,
+ };
+@@ -5112,6 +6409,22 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
+ adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
+ }
+
++static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
++{
++ switch (adev->asic_type) {
++ case CHIP_TOPAZ:
++ case CHIP_STONEY:
++ adev->gfx.rlc.funcs = &iceland_rlc_funcs;
++ break;
++ case CHIP_CARRIZO:
++ adev->gfx.rlc.funcs = &cz_rlc_funcs;
++ break;
++ default:
++ adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
++ break;
++ }
++}
++
+ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
+ {
+ /* init asci gds info */
+@@ -5155,14 +6468,11 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
+ return (~data) & mask;
+ }
+
+-int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
+- struct amdgpu_cu_info *cu_info)
++static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
+ {
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+-
+- if (!adev || !cu_info)
+- return -EINVAL;
++ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+
+ memset(cu_info, 0, sizeof(*cu_info));
+
+@@ -5193,6 +6503,4 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+-
+- return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+index 021e051..16a49f5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+@@ -28,6 +28,5 @@ extern const struct amd_ip_funcs gfx_v8_0_ip_funcs;
+
+ uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
+-int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info);
+
+ #endif
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+index 29bd7b5..9945d5b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+@@ -43,6 +43,8 @@ static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
+ static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+
+ MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
+
+ static const u32 golden_settings_tonga_a11[] =
+ {
+@@ -73,6 +75,23 @@ static const u32 fiji_mgcg_cgcg_init[] =
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+ };
+
++static const u32 golden_settings_polaris11_a11[] =
++{
++ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
++ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
++ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
++ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
++};
++
++static const u32 golden_settings_polaris10_a11[] =
++{
++ mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000,
++ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
++ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
++ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
++ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
++};
++
+ static const u32 cz_mgcg_cgcg_init[] =
+ {
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+@@ -103,6 +122,16 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
+ golden_settings_tonga_a11,
+ (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
+ break;
++ case CHIP_POLARIS11:
++ amdgpu_program_register_sequence(adev,
++ golden_settings_polaris11_a11,
++ (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
++ break;
++ case CHIP_POLARIS10:
++ amdgpu_program_register_sequence(adev,
++ golden_settings_polaris10_a11,
++ (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
++ break;
+ case CHIP_CARRIZO:
+ amdgpu_program_register_sequence(adev,
+ cz_mgcg_cgcg_init,
+@@ -209,6 +238,12 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
+ case CHIP_TONGA:
+ chip_name = "tonga";
+ break;
++ case CHIP_POLARIS11:
++ chip_name = "polaris11";
++ break;
++ case CHIP_POLARIS10:
++ chip_name = "polaris10";
++ break;
+ case CHIP_FIJI:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+@@ -863,14 +898,6 @@ static int gmc_v8_0_early_init(void *handle)
+ gmc_v8_0_set_gart_funcs(adev);
+ gmc_v8_0_set_irq_funcs(adev);
+
+- if (adev->flags & AMD_IS_APU) {
+- adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+- } else {
+- u32 tmp = RREG32(mmMC_SEQ_MISC0);
+- tmp &= MC_SEQ_MISC0__MT__MASK;
+- adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
+- }
+-
+ return 0;
+ }
+
+@@ -878,15 +905,33 @@ static int gmc_v8_0_late_init(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+- return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
++ if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
++ return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
++ else
++ return 0;
+ }
+
++#define mmMC_SEQ_MISC0_FIJI 0xA71
++
+ static int gmc_v8_0_sw_init(void *handle)
+ {
+ int r;
+ int dma_bits;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ if (adev->flags & AMD_IS_APU) {
++ adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
++ } else {
++ u32 tmp;
++
++ if (adev->asic_type == CHIP_FIJI)
++ tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
++ else
++ tmp = RREG32(mmMC_SEQ_MISC0);
++ tmp &= MC_SEQ_MISC0__MT__MASK;
++ adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
++ }
++
+ r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
+ if (r)
+ return r;
+@@ -1075,111 +1120,6 @@ static int gmc_v8_0_wait_for_idle(void *handle)
+
+ }
+
+-static void gmc_v8_0_print_status(void *handle)
+-{
+- int i, j;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+- dev_info(adev->dev, "GMC 8.x registers\n");
+- dev_info(adev->dev, " SRBM_STATUS=0x%08X\n",
+- RREG32(mmSRBM_STATUS));
+- dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
+- RREG32(mmSRBM_STATUS2));
+-
+- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR));
+- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS));
+- dev_info(adev->dev, " MC_VM_MX_L1_TLB_CNTL=0x%08X\n",
+- RREG32(mmMC_VM_MX_L1_TLB_CNTL));
+- dev_info(adev->dev, " VM_L2_CNTL=0x%08X\n",
+- RREG32(mmVM_L2_CNTL));
+- dev_info(adev->dev, " VM_L2_CNTL2=0x%08X\n",
+- RREG32(mmVM_L2_CNTL2));
+- dev_info(adev->dev, " VM_L2_CNTL3=0x%08X\n",
+- RREG32(mmVM_L2_CNTL3));
+- dev_info(adev->dev, " VM_L2_CNTL4=0x%08X\n",
+- RREG32(mmVM_L2_CNTL4));
+- dev_info(adev->dev, " VM_CONTEXT0_PAGE_TABLE_START_ADDR=0x%08X\n",
+- RREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR));
+- dev_info(adev->dev, " VM_CONTEXT0_PAGE_TABLE_END_ADDR=0x%08X\n",
+- RREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR));
+- dev_info(adev->dev, " VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR=0x%08X\n",
+- RREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR));
+- dev_info(adev->dev, " VM_CONTEXT0_CNTL2=0x%08X\n",
+- RREG32(mmVM_CONTEXT0_CNTL2));
+- dev_info(adev->dev, " VM_CONTEXT0_CNTL=0x%08X\n",
+- RREG32(mmVM_CONTEXT0_CNTL));
+- dev_info(adev->dev, " VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR=0x%08X\n",
+- RREG32(mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR));
+- dev_info(adev->dev, " VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR=0x%08X\n",
+- RREG32(mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR));
+- dev_info(adev->dev, " mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET=0x%08X\n",
+- RREG32(mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET));
+- dev_info(adev->dev, " VM_CONTEXT1_PAGE_TABLE_START_ADDR=0x%08X\n",
+- RREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR));
+- dev_info(adev->dev, " VM_CONTEXT1_PAGE_TABLE_END_ADDR=0x%08X\n",
+- RREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR));
+- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR=0x%08X\n",
+- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR));
+- dev_info(adev->dev, " VM_CONTEXT1_CNTL2=0x%08X\n",
+- RREG32(mmVM_CONTEXT1_CNTL2));
+- dev_info(adev->dev, " VM_CONTEXT1_CNTL=0x%08X\n",
+- RREG32(mmVM_CONTEXT1_CNTL));
+- for (i = 0; i < 16; i++) {
+- if (i < 8)
+- dev_info(adev->dev, " VM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%08X\n",
+- i, RREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i));
+- else
+- dev_info(adev->dev, " VM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%08X\n",
+- i, RREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8));
+- }
+- dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_LOW_ADDR=0x%08X\n",
+- RREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR));
+- dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_HIGH_ADDR=0x%08X\n",
+- RREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR));
+- dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR=0x%08X\n",
+- RREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR));
+- dev_info(adev->dev, " MC_VM_FB_LOCATION=0x%08X\n",
+- RREG32(mmMC_VM_FB_LOCATION));
+- dev_info(adev->dev, " MC_VM_AGP_BASE=0x%08X\n",
+- RREG32(mmMC_VM_AGP_BASE));
+- dev_info(adev->dev, " MC_VM_AGP_TOP=0x%08X\n",
+- RREG32(mmMC_VM_AGP_TOP));
+- dev_info(adev->dev, " MC_VM_AGP_BOT=0x%08X\n",
+- RREG32(mmMC_VM_AGP_BOT));
+-
+- dev_info(adev->dev, " HDP_REG_COHERENCY_FLUSH_CNTL=0x%08X\n",
+- RREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL));
+- dev_info(adev->dev, " HDP_NONSURFACE_BASE=0x%08X\n",
+- RREG32(mmHDP_NONSURFACE_BASE));
+- dev_info(adev->dev, " HDP_NONSURFACE_INFO=0x%08X\n",
+- RREG32(mmHDP_NONSURFACE_INFO));
+- dev_info(adev->dev, " HDP_NONSURFACE_SIZE=0x%08X\n",
+- RREG32(mmHDP_NONSURFACE_SIZE));
+- dev_info(adev->dev, " HDP_MISC_CNTL=0x%08X\n",
+- RREG32(mmHDP_MISC_CNTL));
+- dev_info(adev->dev, " HDP_HOST_PATH_CNTL=0x%08X\n",
+- RREG32(mmHDP_HOST_PATH_CNTL));
+-
+- for (i = 0, j = 0; i < 32; i++, j += 0x6) {
+- dev_info(adev->dev, " %d:\n", i);
+- dev_info(adev->dev, " 0x%04X=0x%08X\n",
+- 0xb05 + j, RREG32(0xb05 + j));
+- dev_info(adev->dev, " 0x%04X=0x%08X\n",
+- 0xb06 + j, RREG32(0xb06 + j));
+- dev_info(adev->dev, " 0x%04X=0x%08X\n",
+- 0xb07 + j, RREG32(0xb07 + j));
+- dev_info(adev->dev, " 0x%04X=0x%08X\n",
+- 0xb08 + j, RREG32(0xb08 + j));
+- dev_info(adev->dev, " 0x%04X=0x%08X\n",
+- 0xb09 + j, RREG32(0xb09 + j));
+- }
+-
+- dev_info(adev->dev, " BIF_FB_EN=0x%08X\n",
+- RREG32(mmBIF_FB_EN));
+-}
+-
+ static int gmc_v8_0_soft_reset(void *handle)
+ {
+ struct amdgpu_mode_mc_save save;
+@@ -1199,8 +1139,6 @@ static int gmc_v8_0_soft_reset(void *handle)
+ }
+
+ if (srbm_soft_reset) {
+- gmc_v8_0_print_status((void *)adev);
+-
+ gmc_v8_0_mc_stop(adev, &save);
+ if (gmc_v8_0_wait_for_idle(adev)) {
+ dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
+@@ -1224,8 +1162,6 @@ static int gmc_v8_0_soft_reset(void *handle)
+
+ gmc_v8_0_mc_resume(adev, &save);
+ udelay(50);
+-
+- gmc_v8_0_print_status((void *)adev);
+ }
+
+ return 0;
+@@ -1303,11 +1239,11 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
+ }
+
+ static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev,
+- bool enable)
++ bool enable)
+ {
+ uint32_t data;
+
+- if (enable) {
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ data = RREG32(mmMC_HUB_MISC_HUB_CG);
+ data |= MC_HUB_MISC_HUB_CG__ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_HUB_CG, data);
+@@ -1383,11 +1319,11 @@ static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev,
+ }
+
+ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev,
+- bool enable)
++ bool enable)
+ {
+ uint32_t data;
+
+- if (enable) {
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) {
+ data = RREG32(mmMC_HUB_MISC_HUB_CG);
+ data |= MC_HUB_MISC_HUB_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_HUB_CG, data);
+@@ -1487,6 +1423,7 @@ static int gmc_v8_0_set_powergating_state(void *handle,
+ }
+
+ const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
++ .name = "gmc_v8_0",
+ .early_init = gmc_v8_0_early_init,
+ .late_init = gmc_v8_0_late_init,
+ .sw_init = gmc_v8_0_sw_init,
+@@ -1498,7 +1435,6 @@ const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
+ .is_idle = gmc_v8_0_is_idle,
+ .wait_for_idle = gmc_v8_0_wait_for_idle,
+ .soft_reset = gmc_v8_0_soft_reset,
+- .print_status = gmc_v8_0_print_status,
+ .set_clockgating_state = gmc_v8_0_set_clockgating_state,
+ .set_powergating_state = gmc_v8_0_set_powergating_state,
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+index 6e0a86a..3c09164 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+@@ -242,9 +242,10 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+ * Schedule an IB in the DMA ring (VI).
+ */
+ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
+- struct amdgpu_ib *ib)
++ struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch)
+ {
+- u32 vmid = ib->vm_id & 0xf;
++ u32 vmid = vm_id & 0xf;
+ u32 next_rptr = ring->wptr + 5;
+
+ while ((next_rptr & 7) != 2)
+@@ -701,7 +702,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
+ ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
++ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
+ if (r)
+ goto err1;
+
+@@ -990,7 +991,7 @@ static int sdma_v2_4_sw_init(void *handle)
+ ring->ring_obj = NULL;
+ ring->use_doorbell = false;
+ sprintf(ring->name, "sdma%d", i);
+- r = amdgpu_ring_init(adev, ring, 256 * 1024,
++ r = amdgpu_ring_init(adev, ring, 1024,
+ SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
+ &adev->sdma.trap_irq,
+ (i == 0) ?
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+index 833d265..31d99b00 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+@@ -56,6 +56,11 @@ MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin");
+ MODULE_FIRMWARE("amdgpu/fiji_sdma.bin");
+ MODULE_FIRMWARE("amdgpu/fiji_sdma1.bin");
+ MODULE_FIRMWARE("amdgpu/stoney_sdma.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_sdma.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_sdma1.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
++
+
+ static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
+ {
+@@ -101,6 +106,34 @@ static const u32 fiji_mgcg_cgcg_init[] =
+ mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
+ };
+
++static const u32 golden_settings_polaris11_a11[] =
++{
++ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
++ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
++ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
++ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
++ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
++};
++
++static const u32 golden_settings_polaris10_a11[] =
++{
++ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
++ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
++ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
++ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
++ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
++ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
++};
++
+ static const u32 cz_golden_settings_a11[] =
+ {
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+@@ -172,6 +205,16 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
+ golden_settings_tonga_a11,
+ (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
+ break;
++ case CHIP_POLARIS11:
++ amdgpu_program_register_sequence(adev,
++ golden_settings_polaris11_a11,
++ (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
++ break;
++ case CHIP_POLARIS10:
++ amdgpu_program_register_sequence(adev,
++ golden_settings_polaris10_a11,
++ (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
++ break;
+ case CHIP_CARRIZO:
+ amdgpu_program_register_sequence(adev,
+ cz_mgcg_cgcg_init,
+@@ -220,6 +263,12 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
+ case CHIP_FIJI:
+ chip_name = "fiji";
+ break;
++ case CHIP_POLARIS11:
++ chip_name = "polaris11";
++ break;
++ case CHIP_POLARIS10:
++ chip_name = "polaris10";
++ break;
+ case CHIP_CARRIZO:
+ chip_name = "carrizo";
+ break;
+@@ -353,9 +402,10 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+ * Schedule an IB in the DMA ring (VI).
+ */
+ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
+- struct amdgpu_ib *ib)
++ struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch)
+ {
+- u32 vmid = ib->vm_id & 0xf;
++ u32 vmid = vm_id & 0xf;
+ u32 next_rptr = ring->wptr + 5;
+
+ while ((next_rptr & 7) != 2)
+@@ -878,7 +928,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
++ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
+ if (r)
+ goto err1;
+
+@@ -1176,7 +1226,7 @@ static int sdma_v3_0_sw_init(void *handle)
+ AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1;
+
+ sprintf(ring->name, "sdma%d", i);
+- r = amdgpu_ring_init(adev, ring, 256 * 1024,
++ r = amdgpu_ring_init(adev, ring, 1024,
+ SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
+ &adev->sdma.trap_irq,
+ (i == 0) ?
+@@ -1267,57 +1317,6 @@ static int sdma_v3_0_wait_for_idle(void *handle)
+ return -ETIMEDOUT;
+ }
+
+-static void sdma_v3_0_print_status(void *handle)
+-{
+- int i, j;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+- dev_info(adev->dev, "VI SDMA registers\n");
+- dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
+- RREG32(mmSRBM_STATUS2));
+- for (i = 0; i < adev->sdma.num_instances; i++) {
+- dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n",
+- i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n",
+- i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_CNTL=0x%08X\n",
+- i, RREG32(mmSDMA0_CNTL + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n",
+- i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_IB_CNTL=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_RB_CNTL=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_RB_WPTR=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_RB_BASE=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n",
+- i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
+- mutex_lock(&adev->srbm_mutex);
+- for (j = 0; j < 16; j++) {
+- vi_srbm_select(adev, 0, 0, 0, j);
+- dev_info(adev->dev, " VM %d:\n", j);
+- dev_info(adev->dev, " SDMA%d_GFX_VIRTUAL_ADDR=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i]));
+- dev_info(adev->dev, " SDMA%d_GFX_APE1_CNTL=0x%08X\n",
+- i, RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i]));
+- }
+- vi_srbm_select(adev, 0, 0, 0, 0);
+- mutex_unlock(&adev->srbm_mutex);
+- }
+-}
+-
+ static int sdma_v3_0_soft_reset(void *handle)
+ {
+ u32 srbm_soft_reset = 0;
+@@ -1340,8 +1339,6 @@ static int sdma_v3_0_soft_reset(void *handle)
+ }
+
+ if (srbm_soft_reset) {
+- sdma_v3_0_print_status((void *)adev);
+-
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+@@ -1356,8 +1353,6 @@ static int sdma_v3_0_soft_reset(void *handle)
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+-
+- sdma_v3_0_print_status((void *)adev);
+ }
+
+ return 0;
+@@ -1458,40 +1453,31 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ return 0;
+ }
+
+-static void fiji_update_sdma_medium_grain_clock_gating(
++static void sdma_v3_0_update_sdma_medium_grain_clock_gating(
+ struct amdgpu_device *adev,
+ bool enable)
+ {
+ uint32_t temp, data;
++ int i;
+
+- if (enable) {
+- temp = data = RREG32(mmSDMA0_CLK_CTRL);
+- data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+- if (data != temp)
+- WREG32(mmSDMA0_CLK_CTRL, data);
+-
+- temp = data = RREG32(mmSDMA1_CLK_CTRL);
+- data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+-
+- if (data != temp)
+- WREG32(mmSDMA1_CLK_CTRL, data);
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
++ for (i = 0; i < adev->sdma.num_instances; i++) {
++ temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]);
++ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
++ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
++ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
++ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
++ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
++ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
++ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
++ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
++ if (data != temp)
++ WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data);
++ }
+ } else {
+- temp = data = RREG32(mmSDMA0_CLK_CTRL);
+- data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
++ for (i = 0; i < adev->sdma.num_instances; i++) {
++ temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]);
++ data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+@@ -1500,54 +1486,35 @@ static void fiji_update_sdma_medium_grain_clock_gating(
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK;
+
+- if (data != temp)
+- WREG32(mmSDMA0_CLK_CTRL, data);
+-
+- temp = data = RREG32(mmSDMA1_CLK_CTRL);
+- data |= SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK;
+-
+- if (data != temp)
+- WREG32(mmSDMA1_CLK_CTRL, data);
++ if (data != temp)
++ WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data);
++ }
+ }
+ }
+
+-static void fiji_update_sdma_medium_grain_light_sleep(
++static void sdma_v3_0_update_sdma_medium_grain_light_sleep(
+ struct amdgpu_device *adev,
+ bool enable)
+ {
+ uint32_t temp, data;
++ int i;
+
+- if (enable) {
+- temp = data = RREG32(mmSDMA0_POWER_CNTL);
+- data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+-
+- if (temp != data)
+- WREG32(mmSDMA0_POWER_CNTL, data);
+-
+- temp = data = RREG32(mmSDMA1_POWER_CNTL);
+- data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
++ for (i = 0; i < adev->sdma.num_instances; i++) {
++ temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]);
++ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+
+- if (temp != data)
+- WREG32(mmSDMA1_POWER_CNTL, data);
++ if (temp != data)
++ WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data);
++ }
+ } else {
+- temp = data = RREG32(mmSDMA0_POWER_CNTL);
+- data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+-
+- if (temp != data)
+- WREG32(mmSDMA0_POWER_CNTL, data);
+-
+- temp = data = RREG32(mmSDMA1_POWER_CNTL);
+- data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
++ for (i = 0; i < adev->sdma.num_instances; i++) {
++ temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]);
++ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+
+- if (temp != data)
+- WREG32(mmSDMA1_POWER_CNTL, data);
++ if (temp != data)
++ WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data);
++ }
+ }
+ }
+
+@@ -1558,9 +1525,11 @@ static int sdma_v3_0_set_clockgating_state(void *handle,
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+- fiji_update_sdma_medium_grain_clock_gating(adev,
++ case CHIP_CARRIZO:
++ case CHIP_STONEY:
++ sdma_v3_0_update_sdma_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+- fiji_update_sdma_medium_grain_light_sleep(adev,
++ sdma_v3_0_update_sdma_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+@@ -1576,6 +1545,7 @@ static int sdma_v3_0_set_powergating_state(void *handle,
+ }
+
+ const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
++ .name = "sdma_v3_0",
+ .early_init = sdma_v3_0_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v3_0_sw_init,
+@@ -1587,7 +1557,6 @@ const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
+ .is_idle = sdma_v3_0_is_idle,
+ .wait_for_idle = sdma_v3_0_wait_for_idle,
+ .soft_reset = sdma_v3_0_soft_reset,
+- .print_status = sdma_v3_0_print_status,
+ .set_clockgating_state = sdma_v3_0_set_clockgating_state,
+ .set_powergating_state = sdma_v3_0_set_powergating_state,
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h b/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h
+index c24a81e..880152c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h
++++ b/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h
+@@ -44,6 +44,7 @@
+ #define UCODE_ID_IH_REG_RESTORE 11
+ #define UCODE_ID_VBIOS 12
+ #define UCODE_ID_MISC_METADATA 13
++#define UCODE_ID_SMU_SK 14
+ #define UCODE_ID_RLC_SCRATCH 32
+ #define UCODE_ID_RLC_SRM_ARAM 33
+ #define UCODE_ID_RLC_SRM_DRAM 34
+diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+index 3cc301d..20b61d7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+@@ -538,7 +538,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
+ * Write ring commands to execute the indirect buffer
+ */
+ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
+- struct amdgpu_ib *ib)
++ struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch)
+ {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+index b90b0ff..4befb62 100644
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+@@ -779,7 +779,8 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
+ * Write ring commands to execute the indirect buffer
+ */
+ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
+- struct amdgpu_ib *ib)
++ struct amdgpu_ib *ib,
++ unsigned vm_id, bool ctx_switch)
+ {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
+index 328707c..0c0c4d1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vi.c
++++ b/drivers/gpu/drm/amd/amdgpu/vi.c
+@@ -79,6 +79,11 @@
+ #include "amdgpu_dm.h"
+ #include "amdgpu_powerplay.h"
+
++MODULE_FIRMWARE("amdgpu/polaris10_smc.bin");
++MODULE_FIRMWARE("amdgpu/polaris10_smc_sk.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_smc.bin");
++MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin");
++
+ /*
+ * Indirect registers accessor
+ */
+@@ -277,6 +282,8 @@ static void vi_init_golden_registers(struct amdgpu_device *adev)
+ stoney_mgcg_cgcg_init,
+ (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
+ break;
++ case CHIP_POLARIS11:
++ case CHIP_POLARIS10:
+ default:
+ break;
+ }
+@@ -538,6 +545,8 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
+ break;
+ case CHIP_FIJI:
+ case CHIP_TONGA:
++ case CHIP_POLARIS11:
++ case CHIP_POLARIS10:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ asic_register_table = cz_allowed_read_registers;
+@@ -908,6 +917,74 @@ static const struct amdgpu_ip_block_version fiji_ip_blocks[] =
+ },
+ };
+
++static const struct amdgpu_ip_block_version polaris11_ip_blocks[] =
++{
++ /* ORDER MATTERS! */
++ {
++ .type = AMD_IP_BLOCK_TYPE_COMMON,
++ .major = 2,
++ .minor = 0,
++ .rev = 0,
++ .funcs = &vi_common_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_GMC,
++ .major = 8,
++ .minor = 1,
++ .rev = 0,
++ .funcs = &gmc_v8_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_IH,
++ .major = 3,
++ .minor = 1,
++ .rev = 0,
++ .funcs = &tonga_ih_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_SMC,
++ .major = 7,
++ .minor = 2,
++ .rev = 0,
++ .funcs = &amdgpu_pp_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_DCE,
++ .major = 11,
++ .minor = 2,
++ .rev = 0,
++ .funcs = &dce_v11_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_GFX,
++ .major = 8,
++ .minor = 0,
++ .rev = 0,
++ .funcs = &gfx_v8_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_SDMA,
++ .major = 3,
++ .minor = 1,
++ .rev = 0,
++ .funcs = &sdma_v3_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_UVD,
++ .major = 6,
++ .minor = 3,
++ .rev = 0,
++ .funcs = &uvd_v6_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_VCE,
++ .major = 3,
++ .minor = 4,
++ .rev = 0,
++ .funcs = &vce_v3_0_ip_funcs,
++ },
++};
++
+ static const struct amdgpu_ip_block_version cz_ip_blocks[] =
+ {
+ /* ORDER MATTERS! */
+@@ -1067,6 +1144,75 @@ static const struct amdgpu_ip_block_version cz_ip_blocks_dal[] =
+ #endif
+ };
+
++static const struct amdgpu_ip_block_version polaris11_ip_blocks_dal[] =
++{
++ /* ORDER MATTERS! */
++ {
++ .type = AMD_IP_BLOCK_TYPE_COMMON,
++ .major = 2,
++ .minor = 0,
++ .rev = 0,
++ .funcs = &vi_common_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_GMC,
++ .major = 8,
++ .minor = 1,
++ .rev = 0,
++ .funcs = &gmc_v8_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_IH,
++ .major = 3,
++ .minor = 1,
++ .rev = 0,
++ .funcs = &tonga_ih_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_SMC,
++ .major = 7,
++ .minor = 2,
++ .rev = 0,
++ /* To Do */
++ .funcs = &amdgpu_pp_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_DCE,
++ .major = 11,
++ .minor = 2,
++ .rev = 0,
++ .funcs = &amdgpu_dm_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_GFX,
++ .major = 8,
++ .minor = 0,
++ .rev = 0,
++ .funcs = &gfx_v8_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_SDMA,
++ .major = 3,
++ .minor = 1,
++ .rev = 0,
++ .funcs = &sdma_v3_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_UVD,
++ .major = 6,
++ .minor = 3,
++ .rev = 0,
++ .funcs = &uvd_v6_0_ip_funcs,
++ },
++ {
++ .type = AMD_IP_BLOCK_TYPE_VCE,
++ .major = 3,
++ .minor = 4,
++ .rev = 0,
++ .funcs = &vce_v3_0_ip_funcs,
++ },
++};
++
+ static const struct amdgpu_ip_block_version tonga_ip_blocks_dal[] =
+ {
+ /* ORDER MATTERS! */
+@@ -1213,7 +1359,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
+ break;
+ case CHIP_FIJI:
+ #if defined(CONFIG_DRM_AMD_DAL)
+- if (amdgpu_dal && amdgpu_device_has_dal_support(adev)) {
++ if (amdgpu_device_has_dal_support(adev)) {
+ adev->ip_blocks = fiji_ip_blocks_dal;
+ adev->num_ip_blocks = ARRAY_SIZE(fiji_ip_blocks_dal);
+ } else {
+@@ -1227,7 +1373,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
+ break;
+ case CHIP_TONGA:
+ #if defined(CONFIG_DRM_AMD_DAL)
+- if (amdgpu_dal && amdgpu_device_has_dal_support(adev)) {
++ if (amdgpu_device_has_dal_support(adev)) {
+ adev->ip_blocks = tonga_ip_blocks_dal;
+ adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks_dal);
+ } else {
+@@ -1239,10 +1385,25 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
+ adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks);
+ #endif
+ break;
++ case CHIP_POLARIS11:
++ case CHIP_POLARIS10:
++#if defined(CONFIG_DRM_AMD_DAL)
++ if (amdgpu_device_has_dal_support(adev)) {
++ adev->ip_blocks = polaris11_ip_blocks_dal;
++ adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks_dal);
++ } else {
++ adev->ip_blocks = polaris11_ip_blocks;
++ adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks);
++ }
++#else
++ adev->ip_blocks = polaris11_ip_blocks;
++ adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks);
++#endif
++ break;
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ #if defined(CONFIG_DRM_AMD_DAL)
+- if (amdgpu_dal && amdgpu_device_has_dal_support(adev)) {
++ if (amdgpu_device_has_dal_support(adev)) {
+ adev->ip_blocks = cz_ip_blocks_dal;
+ adev->num_ip_blocks = ARRAY_SIZE(cz_ip_blocks_dal);
+ } else {
+@@ -1286,7 +1447,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
+ .get_xclk = &vi_get_xclk,
+ .set_uvd_clocks = &vi_set_uvd_clocks,
+ .set_vce_clocks = &vi_set_vce_clocks,
+- .get_cu_info = &gfx_v8_0_get_cu_info,
+ /* these should be moved to their own ip modules */
+ .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
+ .wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle,
+@@ -1326,18 +1486,76 @@ static int vi_common_early_init(void *handle)
+ adev->external_rev_id = 0x1;
+ break;
+ case CHIP_FIJI:
+- adev->cg_flags = 0;
++ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
++ AMD_CG_SUPPORT_GFX_MGLS |
++ AMD_CG_SUPPORT_GFX_RLC_LS |
++ AMD_CG_SUPPORT_GFX_CP_LS |
++ AMD_CG_SUPPORT_GFX_CGTS |
++ AMD_CG_SUPPORT_GFX_CGTS_LS |
++ AMD_CG_SUPPORT_GFX_CGCG |
++ AMD_CG_SUPPORT_GFX_CGLS |
++ AMD_CG_SUPPORT_SDMA_MGCG |
++ AMD_CG_SUPPORT_SDMA_LS |
++ AMD_CG_SUPPORT_BIF_LS |
++ AMD_CG_SUPPORT_HDP_MGCG |
++ AMD_CG_SUPPORT_HDP_LS |
++ AMD_CG_SUPPORT_ROM_MGCG |
++ AMD_CG_SUPPORT_MC_MGCG |
++ AMD_CG_SUPPORT_MC_LS;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x3c;
+ break;
+ case CHIP_TONGA:
+- adev->cg_flags = 0;
++ adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x14;
+ break;
++ case CHIP_POLARIS11:
++ adev->cg_flags = 0;
++ adev->pg_flags = 0;
++ adev->external_rev_id = adev->rev_id + 0x5A;
++ break;
++ case CHIP_POLARIS10:
++ adev->cg_flags = 0;
++ adev->pg_flags = 0;
++ adev->external_rev_id = adev->rev_id + 0x50;
++ break;
+ case CHIP_CARRIZO:
++ adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
++ AMD_CG_SUPPORT_GFX_MGCG |
++ AMD_CG_SUPPORT_GFX_MGLS |
++ AMD_CG_SUPPORT_GFX_RLC_LS |
++ AMD_CG_SUPPORT_GFX_CP_LS |
++ AMD_CG_SUPPORT_GFX_CGTS |
++ AMD_CG_SUPPORT_GFX_MGLS |
++ AMD_CG_SUPPORT_GFX_CGTS_LS |
++ AMD_CG_SUPPORT_GFX_CGCG |
++ AMD_CG_SUPPORT_GFX_CGLS |
++ AMD_CG_SUPPORT_BIF_LS |
++ AMD_CG_SUPPORT_HDP_MGCG |
++ AMD_CG_SUPPORT_HDP_LS |
++ AMD_CG_SUPPORT_SDMA_MGCG |
++ AMD_CG_SUPPORT_SDMA_LS;
++ /* rev0 hardware doesn't support PG */
++ adev->pg_flags = 0;
++ if (adev->rev_id != 0x00)
++ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
++ AMD_PG_SUPPORT_GFX_SMG |
++ AMD_PG_SUPPORT_GFX_DMG |
++ AMD_PG_SUPPORT_CP |
++ AMD_PG_SUPPORT_RLC_SMU_HS |
++ AMD_PG_SUPPORT_GFX_PIPELINE;
++ adev->external_rev_id = adev->rev_id + 0x1;
++ break;
+ case CHIP_STONEY:
+- adev->cg_flags = 0;
++ adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
++ AMD_CG_SUPPORT_GFX_MGCG |
++ AMD_CG_SUPPORT_GFX_MGLS |
++ AMD_CG_SUPPORT_BIF_LS |
++ AMD_CG_SUPPORT_HDP_MGCG |
++ AMD_CG_SUPPORT_HDP_LS |
++ AMD_CG_SUPPORT_SDMA_MGCG |
++ AMD_CG_SUPPORT_SDMA_LS;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
+@@ -1414,24 +1632,19 @@ static int vi_common_wait_for_idle(void *handle)
+ return 0;
+ }
+
+-static void vi_common_print_status(void *handle)
+-{
+- return;
+-}
+-
+ static int vi_common_soft_reset(void *handle)
+ {
+ return 0;
+ }
+
+-static void fiji_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev,
+- bool enable)
++static void vi_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev,
++ bool enable)
+ {
+ uint32_t temp, data;
+
+ temp = data = RREG32_PCIE(ixPCIE_CNTL2);
+
+- if (enable)
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
+@@ -1444,14 +1657,14 @@ static void fiji_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev,
+ WREG32_PCIE(ixPCIE_CNTL2, data);
+ }
+
+-static void fiji_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev,
+- bool enable)
++static void vi_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev,
++ bool enable)
+ {
+ uint32_t temp, data;
+
+ temp = data = RREG32(mmHDP_HOST_PATH_CNTL);
+
+- if (enable)
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ data &= ~HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK;
+ else
+ data |= HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK;
+@@ -1460,14 +1673,14 @@ static void fiji_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev
+ WREG32(mmHDP_HOST_PATH_CNTL, data);
+ }
+
+-static void fiji_update_hdp_light_sleep(struct amdgpu_device *adev,
+- bool enable)
++static void vi_update_hdp_light_sleep(struct amdgpu_device *adev,
++ bool enable)
+ {
+ uint32_t temp, data;
+
+ temp = data = RREG32(mmHDP_MEM_POWER_LS);
+
+- if (enable)
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+ else
+ data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+@@ -1476,14 +1689,14 @@ static void fiji_update_hdp_light_sleep(struct amdgpu_device *adev,
+ WREG32(mmHDP_MEM_POWER_LS, data);
+ }
+
+-static void fiji_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev,
+- bool enable)
++static void vi_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev,
++ bool enable)
+ {
+ uint32_t temp, data;
+
+ temp = data = RREG32_SMC(ixCGTT_ROM_CLK_CTRL0);
+
+- if (enable)
++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
+ else
+@@ -1495,19 +1708,28 @@ static void fiji_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev
+ }
+
+ static int vi_common_set_clockgating_state(void *handle,
+- enum amd_clockgating_state state)
++ enum amd_clockgating_state state)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+- fiji_update_bif_medium_grain_light_sleep(adev,
++ vi_update_bif_medium_grain_light_sleep(adev,
++ state == AMD_CG_STATE_GATE ? true : false);
++ vi_update_hdp_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+- fiji_update_hdp_medium_grain_clock_gating(adev,
++ vi_update_hdp_light_sleep(adev,
++ state == AMD_CG_STATE_GATE ? true : false);
++ vi_update_rom_medium_grain_clock_gating(adev,
++ state == AMD_CG_STATE_GATE ? true : false);
++ break;
++ case CHIP_CARRIZO:
++ case CHIP_STONEY:
++ vi_update_bif_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+- fiji_update_hdp_light_sleep(adev,
++ vi_update_hdp_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+- fiji_update_rom_medium_grain_clock_gating(adev,
++ vi_update_hdp_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+@@ -1523,6 +1745,7 @@ static int vi_common_set_powergating_state(void *handle,
+ }
+
+ const struct amd_ip_funcs vi_common_ip_funcs = {
++ .name = "vi_common",
+ .early_init = vi_common_early_init,
+ .late_init = NULL,
+ .sw_init = vi_common_sw_init,
+@@ -1534,7 +1757,6 @@ const struct amd_ip_funcs vi_common_ip_funcs = {
+ .is_idle = vi_common_is_idle,
+ .wait_for_idle = vi_common_wait_for_idle,
+ .soft_reset = vi_common_soft_reset,
+- .print_status = vi_common_print_status,
+ .set_clockgating_state = vi_common_set_clockgating_state,
+ .set_powergating_state = vi_common_set_powergating_state,
+ };
+diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
+index 04e4090..45fde50 100644
+--- a/drivers/gpu/drm/amd/include/amd_shared.h
++++ b/drivers/gpu/drm/amd/include/amd_shared.h
+@@ -48,6 +48,8 @@ enum amd_asic_type {
+ CHIP_FIJI,
+ CHIP_CARRIZO,
+ CHIP_STONEY,
++ CHIP_POLARIS10,
++ CHIP_POLARIS11,
+ CHIP_LAST,
+ };
+
+@@ -104,6 +106,7 @@ enum amd_powergating_state {
+ #define AMD_CG_SUPPORT_VCE_MGCG (1 << 14)
+ #define AMD_CG_SUPPORT_HDP_LS (1 << 15)
+ #define AMD_CG_SUPPORT_HDP_MGCG (1 << 16)
++#define AMD_CG_SUPPORT_ROM_MGCG (1 << 17)
+
+ /* PG flags */
+ #define AMD_PG_SUPPORT_GFX_PG (1 << 0)
+@@ -117,6 +120,8 @@ enum amd_powergating_state {
+ #define AMD_PG_SUPPORT_SDMA (1 << 8)
+ #define AMD_PG_SUPPORT_ACP (1 << 9)
+ #define AMD_PG_SUPPORT_SAMU (1 << 10)
++#define AMD_PG_SUPPORT_GFX_QUICK_MG (1 << 11)
++#define AMD_PG_SUPPORT_GFX_PIPELINE (1 << 12)
+
+ enum amd_pm_state_type {
+ /* not used for dpm */
+@@ -140,6 +145,8 @@ enum amd_pm_state_type {
+ };
+
+ struct amd_ip_funcs {
++ /* Name of IP block */
++ char *name;
+ /* sets up early driver state (pre sw_init), does not configure hw - Optional */
+ int (*early_init)(void *handle);
+ /* sets up late driver/hw state (post hw_init) - Optional */
+diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
+index a9b6923..ebaf67b 100644
+--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
++++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
+@@ -1391,6 +1391,8 @@
+ #define mmRLC_CGTT_MGCG_OVERRIDE 0xec48
+ #define mmRLC_CGCG_CGLS_CTRL 0xec49
+ #define mmRLC_CGCG_RAMP_CTRL 0xec4a
++#define mmRLC_CGCG_CGLS_CTRL_3D 0xec9d
++#define mmRLC_CGCG_RAMP_CTRL_3D 0xec9e
+ #define mmRLC_DYN_PG_STATUS 0xec4b
+ #define mmRLC_DYN_PG_REQUEST 0xec4c
+ #define mmRLC_PG_DELAY 0xec4d
+diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
+index eaf451e..32f3e34 100644
+--- a/drivers/gpu/drm/amd/include/atombios.h
++++ b/drivers/gpu/drm/amd/include/atombios.h
+@@ -79,9 +79,23 @@
+ #define ATOM_PPLL0 2
+ #define ATOM_PPLL3 3
+
++#define ATOM_PHY_PLL0 4
++#define ATOM_PHY_PLL1 5
++
+ #define ATOM_EXT_PLL1 8
++#define ATOM_GCK_DFS 8
+ #define ATOM_EXT_PLL2 9
++#define ATOM_FCH_CLK 9
+ #define ATOM_EXT_CLOCK 10
++#define ATOM_DP_DTO 11
++
++#define ATOM_COMBOPHY_PLL0 20
++#define ATOM_COMBOPHY_PLL1 21
++#define ATOM_COMBOPHY_PLL2 22
++#define ATOM_COMBOPHY_PLL3 23
++#define ATOM_COMBOPHY_PLL4 24
++#define ATOM_COMBOPHY_PLL5 25
++
+ #define ATOM_PPLL_INVALID 0xFF
+
+ #define ENCODER_REFCLK_SRC_P1PLL 0
+@@ -224,6 +238,31 @@ typedef struct _ATOM_ROM_HEADER
+ UCHAR ucReserved;
+ }ATOM_ROM_HEADER;
+
++
++typedef struct _ATOM_ROM_HEADER_V2_1
++{
++ ATOM_COMMON_TABLE_HEADER sHeader;
++ UCHAR uaFirmWareSignature[4]; //Signature to distinguish between Atombios and non-atombios,
++ //atombios should init it as "ATOM", don't change the position
++ USHORT usBiosRuntimeSegmentAddress;
++ USHORT usProtectedModeInfoOffset;
++ USHORT usConfigFilenameOffset;
++ USHORT usCRC_BlockOffset;
++ USHORT usBIOS_BootupMessageOffset;
++ USHORT usInt10Offset;
++ USHORT usPciBusDevInitCode;
++ USHORT usIoBaseAddress;
++ USHORT usSubsystemVendorID;
++ USHORT usSubsystemID;
++ USHORT usPCI_InfoOffset;
++ USHORT usMasterCommandTableOffset;//Offest for SW to get all command table offsets, Don't change the position
++ USHORT usMasterDataTableOffset; //Offest for SW to get all data table offsets, Don't change the position
++ UCHAR ucExtendedFunctionCode;
++ UCHAR ucReserved;
++ ULONG ulPSPDirTableOffset;
++}ATOM_ROM_HEADER_V2_1;
++
++
+ //==============================Command Table Portion====================================
+
+
+@@ -272,12 +311,12 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{
+ USHORT GetSCLKOverMCLKRatio; //Atomic Table, only used by Bios
+ USHORT SetCRTC_Timing; //Atomic Table, directly used by various SW components,latest version 1.1
+ USHORT SetCRTC_OverScan; //Atomic Table, used by various SW components,latest version 1.1
+- USHORT SetCRTC_Replication; //Atomic Table, used only by Bios
++ USHORT GetSMUClockInfo; //Atomic Table, used only by Bios
+ USHORT SelectCRTC_Source; //Atomic Table, directly used by various SW components,latest version 1.1
+ USHORT EnableGraphSurfaces; //Atomic Table, used only by Bios
+ USHORT UpdateCRTC_DoubleBufferRegisters; //Atomic Table, used only by Bios
+ USHORT LUT_AutoFill; //Atomic Table, only used by Bios
+- USHORT EnableHW_IconCursor; //Atomic Table, only used by Bios
++ USHORT SetDCEClock; //Atomic Table, start from DCE11.1, shared by driver and VBIOS, change DISPCLK and DPREFCLK
+ USHORT GetMemoryClock; //Atomic Table, directly used by various SW components,latest version 1.1
+ USHORT GetEngineClock; //Atomic Table, directly used by various SW components,latest version 1.1
+ USHORT SetCRTC_UsingDTDTiming; //Atomic Table, directly used by various SW components,latest version 1.1
+@@ -292,7 +331,7 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{
+ USHORT PowerConnectorDetection; //Atomic Table, directly used by various SW components,latest version 1.1
+ USHORT MC_Synchronization; //Atomic Table, indirectly used by various SW components,called from SetMemoryClock
+ USHORT ComputeMemoryEnginePLL; //Atomic Table, indirectly used by various SW components,called from SetMemory/EngineClock
+- USHORT MemoryRefreshConversion; //Atomic Table, indirectly used by various SW components,called from SetMemory or SetEngineClock
++ USHORT Gfx_Init; //Atomic Table, indirectly used by various SW components,called from SetMemory or SetEngineClock
+ USHORT VRAM_GetCurrentInfoBlock; //Atomic Table, used only by Bios
+ USHORT DynamicMemorySettings; //Atomic Table, indirectly used by various SW components,called from SetMemoryClock
+ USHORT MemoryTraining; //Atomic Table, used only by Bios
+@@ -333,6 +372,10 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{
+ #define LCD1OutputControl HW_Misc_Operation
+ #define TV1OutputControl Gfx_Harvesting
+ #define TVEncoderControl SMC_Init
++#define EnableHW_IconCursor SetDCEClock
++#define SetCRTC_Replication GetSMUClockInfo
++
++#define MemoryRefreshConversion Gfx_Init
+
+ typedef struct _ATOM_MASTER_COMMAND_TABLE
+ {
+@@ -425,6 +468,9 @@ typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2
+ #define b3FIRST_TIME_CHANGE_CLOCK 0x08 //Applicable to both memory and engine clock change,when set, it means this is 1st time to change clock after ASIC bootup
+ #define b3SKIP_SW_PROGRAM_PLL 0x10 //Applicable to both memory and engine clock change, when set, it means the table will not program SPLL/MPLL
+ #define b3DRAM_SELF_REFRESH_EXIT 0x20 //Applicable to DRAM self refresh exit only. when set, it means it will go to program DRAM self refresh exit path
++#define b3SRIOV_INIT_BOOT 0x40 //Use by HV GPU driver only, to load uCode. for ASIC_InitTable SCLK parameter only
++#define b3SRIOV_LOAD_UCODE 0x40 //Use by HV GPU driver only, to load uCode. for ASIC_InitTable SCLK parameter only
++#define b3SRIOV_SKIP_ASIC_INIT 0x02 //Use by HV GPU driver only, skip ASIC_Init for primary adapter boot. for ASIC_InitTable SCLK parameter only
+
+ typedef struct _ATOM_COMPUTE_CLOCK_FREQ
+ {
+@@ -518,6 +564,33 @@ typedef struct _COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6
+ //ucPllCntlFlag
+ #define SPLL_CNTL_FLAG_VCO_MODE_MASK 0x03
+
++typedef struct _COMPUTE_GPU_CLOCK_INPUT_PARAMETERS_V1_7
++{
++ ATOM_COMPUTE_CLOCK_FREQ ulClock; //Input Parameter
++ ULONG ulReserved[5];
++}COMPUTE_GPU_CLOCK_INPUT_PARAMETERS_V1_7;
++
++//ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag
++#define COMPUTE_GPUCLK_INPUT_FLAG_CLK_TYPE_MASK 0x0f
++#define COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK 0x00
++#define COMPUTE_GPUCLK_INPUT_FLAG_SCLK 0x01
++
++typedef struct _COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7
++{
++ COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 ulClock; //Output Parameter: ucPostDiv=DFS divider
++ USHORT usSclk_fcw_frac; //fractional divider of fcw = usSclk_fcw_frac/65536
++ USHORT usSclk_fcw_int; //integer divider of fcwc
++ UCHAR ucSclkPostDiv; //PLL post divider = 2^ucSclkPostDiv
++ UCHAR ucSclkVcoMode; //0: 4G~8Ghz, 1:3G~6Ghz,3: 2G~4Ghz, 2:Reserved
++ UCHAR ucSclkPllRange; //GreenTable SCLK PLL range entry index ( 0~7 )
++ UCHAR ucSscEnable;
++ USHORT usSsc_fcw1_frac; //fcw1_frac when SSC enable
++ USHORT usSsc_fcw1_int; //fcw1_int when SSC enable
++ USHORT usReserved;
++ USHORT usPcc_fcw_int;
++ USHORT usSsc_fcw_slew_frac; //fcw_slew_frac when SSC enable
++ USHORT usPcc_fcw_slew_frac;
++}COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7;
+
+ // ucInputFlag
+ #define ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN 1 // 1-StrobeMode, 0-PerformanceMode
+@@ -557,12 +630,16 @@ typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2
+ ULONG ulReserved;
+ }COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2;
+
++//Input parameter of DynamicMemorySettingsTable
++//when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag = COMPUTE_MEMORY_PLL_PARAM
+ typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER
+ {
+ ATOM_COMPUTE_CLOCK_FREQ ulClock;
+ ULONG ulReserved[2];
+ }DYNAMICE_MEMORY_SETTINGS_PARAMETER;
+
++//Input parameter of DynamicMemorySettingsTable
++//when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag == COMPUTE_ENGINE_PLL_PARAM
+ typedef struct _DYNAMICE_ENGINE_SETTINGS_PARAMETER
+ {
+ ATOM_COMPUTE_CLOCK_FREQ ulClock;
+@@ -570,6 +647,29 @@ typedef struct _DYNAMICE_ENGINE_SETTINGS_PARAMETER
+ ULONG ulReserved;
+ }DYNAMICE_ENGINE_SETTINGS_PARAMETER;
+
++//Input parameter of DynamicMemorySettingsTable ver2.1 and above
++//when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag == ADJUST_MC_SETTING_PARAM
++typedef struct _DYNAMICE_MC_DPM_SETTINGS_PARAMETER
++{
++ ATOM_COMPUTE_CLOCK_FREQ ulClock;
++ UCHAR ucMclkDPMState;
++ UCHAR ucReserved[3];
++ ULONG ulReserved;
++}DYNAMICE_MC_DPM_SETTINGS_PARAMETER;
++
++//ucMclkDPMState
++#define DYNAMIC_MC_DPM_SETTING_LOW_DPM_STATE 0
++#define DYNAMIC_MC_DPM_SETTING_MEDIUM_DPM_STATE 1
++#define DYNAMIC_MC_DPM_SETTING_HIGH_DPM_STATE 2
++
++typedef union _DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1
++{
++ DYNAMICE_MEMORY_SETTINGS_PARAMETER asMCReg;
++ DYNAMICE_ENGINE_SETTINGS_PARAMETER asMCArbReg;
++ DYNAMICE_MC_DPM_SETTINGS_PARAMETER asDPMMCReg;
++}DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1;
++
++
+ /****************************************************************************/
+ // Structures used by SetEngineClockTable
+ /****************************************************************************/
+@@ -584,6 +684,13 @@ typedef struct _SET_ENGINE_CLOCK_PS_ALLOCATION
+ COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION sReserved;
+ }SET_ENGINE_CLOCK_PS_ALLOCATION;
+
++typedef struct _SET_ENGINE_CLOCK_PS_ALLOCATION_V1_2
++{
++ ULONG ulTargetEngineClock; //In 10Khz unit
++ COMPUTE_GPU_CLOCK_INPUT_PARAMETERS_V1_7 sReserved;
++}SET_ENGINE_CLOCK_PS_ALLOCATION_V1_2;
++
++
+ /****************************************************************************/
+ // Structures used by SetMemoryClockTable
+ /****************************************************************************/
+@@ -827,6 +934,12 @@ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V2
+ #define ATOM_ENCODER_CMD_SETUP 0x0f
+ #define ATOM_ENCODER_CMD_SETUP_PANEL_MODE 0x10
+
++// New Command for DIGxEncoderControlTable v1.5
++#define ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN4 0x14
++#define ATOM_ENCODER_CMD_STREAM_SETUP 0x0F //change name ATOM_ENCODER_CMD_SETUP
++#define ATOM_ENCODER_CMD_LINK_SETUP 0x11 //internal use, called by other Command Table
++#define ATOM_ENCODER_CMD_ENCODER_BLANK 0x12 //internal use, called by other Command Table
++
+ // ucStatus
+ #define ATOM_ENCODER_STATUS_LINK_TRAINING_COMPLETE 0x10
+ #define ATOM_ENCODER_STATUS_LINK_TRAINING_INCOMPLETE 0x00
+@@ -955,6 +1068,69 @@ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V4
+ #define DP_PANEL_MODE_INTERNAL_DP2_MODE 0x01
+ #define DP_PANEL_MODE_INTERNAL_DP1_MODE 0x11
+
++
++typedef struct _ENCODER_STREAM_SETUP_PARAMETERS_V5
++{
++ UCHAR ucDigId; // 0~6 map to DIG0~DIG6
++ UCHAR ucAction; // = ATOM_ENOCODER_CMD_STREAM_SETUP
++ UCHAR ucDigMode; // ATOM_ENCODER_MODE_DP/ATOM_ENCODER_MODE_DVI/ATOM_ENCODER_MODE_HDMI
++ UCHAR ucLaneNum; // Lane number
++ ULONG ulPixelClock; // Pixel Clock in 10Khz
++ UCHAR ucBitPerColor;
++ UCHAR ucLinkRateIn270Mhz;//= DP link rate/270Mhz, =6: 1.62G = 10: 2.7G, =20: 5.4Ghz, =30: 8.1Ghz etc
++ UCHAR ucReserved[2];
++}ENCODER_STREAM_SETUP_PARAMETERS_V5;
++
++typedef struct _ENCODER_LINK_SETUP_PARAMETERS_V5
++{
++ UCHAR ucDigId; // 0~6 map to DIG0~DIG6
++ UCHAR ucAction; // = ATOM_ENOCODER_CMD_LINK_SETUP
++ UCHAR ucDigMode; // ATOM_ENCODER_MODE_DP/ATOM_ENCODER_MODE_DVI/ATOM_ENCODER_MODE_HDMI
++ UCHAR ucLaneNum; // Lane number
++ ULONG ulSymClock; // Symbol Clock in 10Khz
++ UCHAR ucHPDSel;
++ UCHAR ucDigEncoderSel; // DIG stream( front-end ) selection, bit0 means DIG0 FE is enable,
++ UCHAR ucReserved[2];
++}ENCODER_LINK_SETUP_PARAMETERS_V5;
++
++typedef struct _DP_PANEL_MODE_SETUP_PARAMETERS_V5
++{
++ UCHAR ucDigId; // 0~6 map to DIG0~DIG6
++ UCHAR ucAction; // = ATOM_ENCODER_CMD_DPLINK_SETUP
++ UCHAR ucPanelMode; // =0: external DP
++ // =0x1: internal DP2
++ // =0x11: internal DP1 NutMeg/Travis DP Translator
++ UCHAR ucReserved;
++ ULONG ulReserved[2];
++}DP_PANEL_MODE_SETUP_PARAMETERS_V5;
++
++typedef struct _ENCODER_GENERIC_CMD_PARAMETERS_V5
++{
++ UCHAR ucDigId; // 0~6 map to DIG0~DIG6
++ UCHAR ucAction; // = rest of generic encoder command which does not carry any parameters
++ UCHAR ucReserved[2];
++ ULONG ulReserved[2];
++}ENCODER_GENERIC_CMD_PARAMETERS_V5;
++
++//ucDigId
++#define ATOM_ENCODER_CONFIG_V5_DIG0_ENCODER 0x00
++#define ATOM_ENCODER_CONFIG_V5_DIG1_ENCODER 0x01
++#define ATOM_ENCODER_CONFIG_V5_DIG2_ENCODER 0x02
++#define ATOM_ENCODER_CONFIG_V5_DIG3_ENCODER 0x03
++#define ATOM_ENCODER_CONFIG_V5_DIG4_ENCODER 0x04
++#define ATOM_ENCODER_CONFIG_V5_DIG5_ENCODER 0x05
++#define ATOM_ENCODER_CONFIG_V5_DIG6_ENCODER 0x06
++
++
++typedef union _DIG_ENCODER_CONTROL_PARAMETERS_V5
++{
++ ENCODER_GENERIC_CMD_PARAMETERS_V5 asCmdParam;
++ ENCODER_STREAM_SETUP_PARAMETERS_V5 asStreamParam;
++ ENCODER_LINK_SETUP_PARAMETERS_V5 asLinkParam;
++ DP_PANEL_MODE_SETUP_PARAMETERS_V5 asDPPanelModeParam;
++}DIG_ENCODER_CONTROL_PARAMETERS_V5;
++
++
+ /****************************************************************************/
+ // Structures used by UNIPHYTransmitterControlTable
+ // LVTMATransmitterControlTable
+@@ -1371,6 +1547,49 @@ typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5
+
+ #define DIG_TRANSMITTER_CONTROL_PS_ALLOCATION_V1_5 DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5
+
++typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6
++{
++ UCHAR ucPhyId; // 0=UNIPHYA, 1=UNIPHYB, 2=UNIPHYC, 3=UNIPHYD, 4= UNIPHYE 5=UNIPHYF
++ UCHAR ucAction; // define as ATOM_TRANSMITER_ACTION_xxx
++ union
++ {
++ UCHAR ucDigMode; // ATOM_ENCODER_MODE_DP/ATOM_ENCODER_MODE_DVI/ATOM_ENCODER_MODE_HDMI
++ UCHAR ucDPLaneSet; // DP voltage swing and pre-emphasis value defined in DPCD DP_LANE_SET, "DP_LANE_SET__xDB_y_zV"
++ };
++ UCHAR ucLaneNum; // Lane number
++ ULONG ulSymClock; // Symbol Clock in 10Khz
++ UCHAR ucHPDSel; // =1: HPD1, =2: HPD2, .... =6: HPD6, =0: HPD is not assigned
++ UCHAR ucDigEncoderSel; // DIG stream( front-end ) selection, bit0 means DIG0 FE is enable,
++ UCHAR ucConnObjId; // Connector Object Id defined in ObjectId.h
++ UCHAR ucReserved;
++ ULONG ulReserved;
++}DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6;
++
++
++// ucDigEncoderSel
++#define ATOM_TRANMSITTER_V6__DIGA_SEL 0x01
++#define ATOM_TRANMSITTER_V6__DIGB_SEL 0x02
++#define ATOM_TRANMSITTER_V6__DIGC_SEL 0x04
++#define ATOM_TRANMSITTER_V6__DIGD_SEL 0x08
++#define ATOM_TRANMSITTER_V6__DIGE_SEL 0x10
++#define ATOM_TRANMSITTER_V6__DIGF_SEL 0x20
++#define ATOM_TRANMSITTER_V6__DIGG_SEL 0x40
++
++// ucDigMode
++#define ATOM_TRANSMITTER_DIGMODE_V6_DP 0
++#define ATOM_TRANSMITTER_DIGMODE_V6_DVI 2
++#define ATOM_TRANSMITTER_DIGMODE_V6_HDMI 3
++#define ATOM_TRANSMITTER_DIGMODE_V6_DP_MST 5
++
++//ucHPDSel
++#define ATOM_TRANSMITTER_V6_NO_HPD_SEL 0x00
++#define ATOM_TRANSMITTER_V6_HPD1_SEL 0x01
++#define ATOM_TRANSMITTER_V6_HPD2_SEL 0x02
++#define ATOM_TRANSMITTER_V6_HPD3_SEL 0x03
++#define ATOM_TRANSMITTER_V6_HPD4_SEL 0x04
++#define ATOM_TRANSMITTER_V6_HPD5_SEL 0x05
++#define ATOM_TRANSMITTER_V6_HPD6_SEL 0x06
++
+
+ /****************************************************************************/
+ // Structures used by ExternalEncoderControlTable V1.3
+@@ -1784,6 +2003,101 @@ typedef struct _GET_DISP_PLL_STATUS_INPUT_PARAMETERS_V3
+ PIXEL_CLOCK_PARAMETERS_V5 sDispClkInput;
+ }GET_DISP_PLL_STATUS_INPUT_PARAMETERS_V3;
+
++typedef struct _PIXEL_CLOCK_PARAMETERS_V7
++{
++ ULONG ulPixelClock; // target the pixel clock to drive the CRTC timing in unit of 100Hz.
++
++ UCHAR ucPpll; // ATOM_PHY_PLL0/ATOM_PHY_PLL1/ATOM_PPLL0
++ UCHAR ucTransmitterID; // ASIC encoder id defined in objectId.h,
++ // indicate which graphic encoder will be used.
++ UCHAR ucEncoderMode; // Encoder mode:
++ UCHAR ucMiscInfo; // bit[0]= Force program PLL for pixclk
++ // bit[1]= Force program PHY PLL only ( internally used by VBIOS only in DP case which PHYPLL is programmed for SYMCLK, not Pixclk )
++ // bit[5:4]= RefClock source for PPLL.
++ // =0: XTLAIN( default mode )
++ // =1: pcie
++ // =2: GENLK
++ UCHAR ucCRTC; // ATOM_CRTC1~6, indicate the CRTC controller to
++ UCHAR ucDeepColorRatio; // HDMI panel bit depth: =0: 24bpp =1:30bpp, =2:36bpp
++ UCHAR ucReserved[2];
++ ULONG ulReserved;
++}PIXEL_CLOCK_PARAMETERS_V7;
++
++//ucMiscInfo
++#define PIXEL_CLOCK_V7_MISC_FORCE_PROG_PPLL 0x01
++#define PIXEL_CLOCK_V7_MISC_PROG_PHYPLL 0x02
++#define PIXEL_CLOCK_V7_MISC_YUV420_MODE 0x04
++#define PIXEL_CLOCK_V7_MISC_DVI_DUALLINK_EN 0x08
++#define PIXEL_CLOCK_V7_MISC_REF_DIV_SRC 0x30
++#define PIXEL_CLOCK_V7_MISC_REF_DIV_SRC_XTALIN 0x00
++#define PIXEL_CLOCK_V7_MISC_REF_DIV_SRC_PCIE 0x10
++#define PIXEL_CLOCK_V7_MISC_REF_DIV_SRC_GENLK 0x20
++
++//ucDeepColorRatio
++#define PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_DIS 0x00 //00 - DCCG_DEEP_COLOR_DTO_DISABLE: Disable Deep Color DTO
++#define PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_5_4 0x01 //01 - DCCG_DEEP_COLOR_DTO_5_4_RATIO: Set Deep Color DTO to 5:4
++#define PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_3_2 0x02 //02 - DCCG_DEEP_COLOR_DTO_3_2_RATIO: Set Deep Color DTO to 3:2
++#define PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_2_1 0x03 //03 - DCCG_DEEP_COLOR_DTO_2_1_RATIO: Set Deep Color DTO to 2:1
++
++// SetDCEClockTable input parameter for DCE11.1
++typedef struct _SET_DCE_CLOCK_PARAMETERS_V1_1
++{
++ ULONG ulDISPClkFreq; // target DISPCLK frquency in unit of 10kHz, return real DISPCLK frequency. when ucFlag[1]=1, in unit of 100Hz.
++ UCHAR ucFlag; // bit0=1: DPREFCLK bypass DFS bit0=0: DPREFCLK not bypass DFS
++ UCHAR ucCrtc; // use when enable DCCG pixel clock ucFlag[1]=1
++ UCHAR ucPpllId; // use when enable DCCG pixel clock ucFlag[1]=1
++ UCHAR ucDeepColorRatio; // use when enable DCCG pixel clock ucFlag[1]=1
++}SET_DCE_CLOCK_PARAMETERS_V1_1;
++
++
++typedef struct _SET_DCE_CLOCK_PS_ALLOCATION_V1_1
++{
++ SET_DCE_CLOCK_PARAMETERS_V1_1 asParam;
++ ULONG ulReserved[2];
++}SET_DCE_CLOCK_PS_ALLOCATION_V1_1;
++
++//SET_DCE_CLOCK_PARAMETERS_V1_1.ucFlag
++#define SET_DCE_CLOCK_FLAG_GEN_DPREFCLK 0x01
++#define SET_DCE_CLOCK_FLAG_DPREFCLK_BYPASS 0x01
++#define SET_DCE_CLOCK_FLAG_ENABLE_PIXCLK 0x02
++
++// SetDCEClockTable input parameter for DCE11.2( POLARIS10 and POLARIS11 ) and above
++typedef struct _SET_DCE_CLOCK_PARAMETERS_V2_1
++{
++ ULONG ulDCEClkFreq; // target DCE frequency in unit of 10KHZ, return real DISPCLK/DPREFCLK frequency.
++ UCHAR ucDCEClkType; // =0: DISPCLK =1: DPREFCLK =2: PIXCLK
++ UCHAR ucDCEClkSrc; // ATOM_PLL0 or ATOM_GCK_DFS or ATOM_FCH_CLK or ATOM_COMBOPHY_PLLx
++ UCHAR ucDCEClkFlag; // Bit [1:0] = PPLL ref clock source ( when ucDCEClkSrc= ATOM_PPLL0 )
++ UCHAR ucCRTC; // ucDisp Pipe Id, ATOM_CRTC0/1/2/..., use only when ucDCEClkType = PIXCLK
++}SET_DCE_CLOCK_PARAMETERS_V2_1;
++
++//ucDCEClkType
++#define DCE_CLOCK_TYPE_DISPCLK 0
++#define DCE_CLOCK_TYPE_DPREFCLK 1
++#define DCE_CLOCK_TYPE_PIXELCLK 2 // used by VBIOS internally, called by SetPixelClockTable
++
++//ucDCEClkFlag when ucDCEClkType == DPREFCLK
++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_MASK 0x03
++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_GENERICA 0x00
++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_GENLK 0x01
++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_PCIE 0x02
++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_XTALIN 0x03
++
++//ucDCEClkFlag when ucDCEClkType == PIXCLK
++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_MASK 0x03
++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_DIS 0x00 //00 - DCCG_DEEP_COLOR_DTO_DISABLE: Disable Deep Color DTO
++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_5_4 0x01 //01 - DCCG_DEEP_COLOR_DTO_5_4_RATIO: Set Deep Color DTO to 5:4
++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_3_2 0x02 //02 - DCCG_DEEP_COLOR_DTO_3_2_RATIO: Set Deep Color DTO to 3:2
++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_2_1 0x03 //03 - DCCG_DEEP_COLOR_DTO_2_1_RATIO: Set Deep Color DTO to 2:1
++#define DCE_CLOCK_FLAG_PIXCLK_YUV420_MODE 0x04
++
++typedef struct _SET_DCE_CLOCK_PS_ALLOCATION_V2_1
++{
++ SET_DCE_CLOCK_PARAMETERS_V2_1 asParam;
++ ULONG ulReserved[2];
++}SET_DCE_CLOCK_PS_ALLOCATION_V2_1;
++
++
+
+ /****************************************************************************/
+ // Structures used by AdjustDisplayPllTable
+@@ -2300,6 +2614,11 @@ typedef struct _SET_VOLTAGE_PARAMETERS_V1_3
+ #define VOLTAGE_TYPE_VDDCI 4
+ #define VOLTAGE_TYPE_VDDGFX 5
+ #define VOLTAGE_TYPE_PCC 6
++#define VOLTAGE_TYPE_MVPP 7
++#define VOLTAGE_TYPE_LEDDPM 8
++#define VOLTAGE_TYPE_PCC_MVDD 9
++#define VOLTAGE_TYPE_PCIE_VDDC 10
++#define VOLTAGE_TYPE_PCIE_VDDR 11
+
+ #define VOLTAGE_TYPE_GENERIC_I2C_1 0x11
+ #define VOLTAGE_TYPE_GENERIC_I2C_2 0x12
+@@ -2396,6 +2715,39 @@ typedef struct _GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2
+ USHORT usTDP_Power; // TDP_Current in unit of 0.1W
+ }GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2;
+
++
++// New Added from CI Hawaii for GetVoltageInfoTable, input parameter structure
++typedef struct _GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_3
++{
++ UCHAR ucVoltageType; // Input: To tell which voltage to set up, VDDC/MVDDC/MVDDQ/VDDCI
++ UCHAR ucVoltageMode; // Input: Indicate action: Get voltage info
++ USHORT usVoltageLevel; // Input: real voltage level in unit of mv or Voltage Phase (0, 1, 2, .. ) or Leakage Id
++ ULONG ulSCLKFreq; // Input: when ucVoltageMode= ATOM_GET_VOLTAGE_EVV_VOLTAGE, DPM state SCLK frequency, Define in PPTable SCLK/Voltage dependence table
++ ULONG ulReserved[3];
++}GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_3;
++
++// New Added from CI Hawaii for EVV feature
++typedef struct _GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3
++{
++ ULONG ulVoltageLevel; // real voltage level in unit of 0.01mv
++ ULONG ulReserved[4];
++}GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3;
++
++
++/****************************************************************************/
++// Structures used by GetSMUClockInfo
++/****************************************************************************/
++typedef struct _GET_SMU_CLOCK_INFO_INPUT_PARAMETER_V2_1
++{
++ ULONG ulDfsPllOutputFreq:24;
++ ULONG ucDfsDivider:8;
++}GET_SMU_CLOCK_INFO_INPUT_PARAMETER_V2_1;
++
++typedef struct _GET_SMU_CLOCK_INFO_OUTPUT_PARAMETER_V2_1
++{
++ ULONG ulDfsOutputFreq;
++}GET_SMU_CLOCK_INFO_OUTPUT_PARAMETER_V2_1;
++
+ /****************************************************************************/
+ // Structures used by TVEncoderControlTable
+ /****************************************************************************/
+@@ -2429,13 +2781,13 @@ typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES
+ USHORT PaletteData; // Only used by BIOS
+ USHORT LCD_Info; // Shared by various SW components,latest version 1.3, was called LVDS_Info
+ USHORT DIGTransmitterInfo; // Internal used by VBIOS only version 3.1
+- USHORT AnalogTV_Info; // Shared by various SW components,latest version 1.1
++ USHORT SMU_Info; // Shared by various SW components,latest version 1.1
+ USHORT SupportedDevicesInfo; // Will be obsolete from R600
+ USHORT GPIO_I2C_Info; // Shared by various SW components,latest version 1.2 will be used from R600
+ USHORT VRAM_UsageByFirmware; // Shared by various SW components,latest version 1.3 will be used from R600
+ USHORT GPIO_Pin_LUT; // Shared by various SW components,latest version 1.1
+ USHORT VESA_ToInternalModeLUT; // Only used by Bios
+- USHORT ComponentVideoInfo; // Shared by various SW components,latest version 2.1 will be used from R600
++ USHORT GFX_Info; // Shared by various SW components,latest version 2.1 will be used from R600
+ USHORT PowerPlayInfo; // Shared by various SW components,latest version 2.1,new design from R600
+ USHORT GPUVirtualizationInfo; // Will be obsolete from R600
+ USHORT SaveRestoreInfo; // Only used by Bios
+@@ -2455,7 +2807,7 @@ typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES
+ USHORT ASIC_ProfilingInfo; // New table name from R600, used to be called "ASIC_VDDCI_Info" for pre-R600
+ USHORT VoltageObjectInfo; // Shared by various SW components, latest version 1.1
+ USHORT PowerSourceInfo; // Shared by various SW components, latest versoin 1.1
+- USHORT ServiceInfo;
++ USHORT ServiceInfo;
+ }ATOM_MASTER_LIST_OF_DATA_TABLES;
+
+ typedef struct _ATOM_MASTER_DATA_TABLE
+@@ -2469,6 +2821,8 @@ typedef struct _ATOM_MASTER_DATA_TABLE
+ #define DAC_Info PaletteData
+ #define TMDS_Info DIGTransmitterInfo
+ #define CompassionateData GPUVirtualizationInfo
++#define AnalogTV_Info SMU_Info
++#define ComponentVideoInfo GFX_Info
+
+ /****************************************************************************/
+ // Structure used in MultimediaCapabilityInfoTable
+@@ -4278,10 +4632,15 @@ typedef struct _EXT_DISPLAY_PATH
+ #define MAX_NUMBER_OF_EXT_DISPLAY_PATH 7
+
+ //usCaps
+-#define EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE 0x01
+-#define EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN 0x02
+-#define EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 0x04
+-#define EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT 0x08
++#define EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE 0x0001
++#define EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN 0x0002
++#define EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK 0x007C
++#define EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 (0x01 << 2 ) //PI redriver chip
++#define EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT (0x02 << 2 ) //TI retimer chip
++#define EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 (0x03 << 2 ) //Parade DP->HDMI recoverter chip
++
++
++
+
+ typedef struct _ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO
+ {
+@@ -4325,10 +4684,10 @@ typedef struct _ATOM_COMMON_RECORD_HEADER
+ #define ATOM_CONNECTOR_REMOTE_CAP_RECORD_TYPE 19
+ #define ATOM_ENCODER_CAP_RECORD_TYPE 20
+ #define ATOM_BRACKET_LAYOUT_RECORD_TYPE 21
+-
++#define ATOM_CONNECTOR_FORCED_TMDS_CAP_RECORD_TYPE 22
+
+ //Must be updated when new record type is added,equal to that record definition!
+-#define ATOM_MAX_OBJECT_RECORD_NUMBER ATOM_ENCODER_CAP_RECORD_TYPE
++#define ATOM_MAX_OBJECT_RECORD_NUMBER ATOM_CONNECTOR_FORCED_TMDS_CAP_RECORD_TYPE
+
+ typedef struct _ATOM_I2C_RECORD
+ {
+@@ -4458,10 +4817,12 @@ typedef struct _ATOM_ENCODER_DVO_CF_RECORD
+ UCHAR ucPadding[2];
+ }ATOM_ENCODER_DVO_CF_RECORD;
+
+-// Bit maps for ATOM_ENCODER_CAP_RECORD.ucEncoderCap
+-#define ATOM_ENCODER_CAP_RECORD_HBR2 0x01 // DP1.2 HBR2 is supported by HW encoder
++// Bit maps for ATOM_ENCODER_CAP_RECORD.usEncoderCap
++#define ATOM_ENCODER_CAP_RECORD_HBR2 0x01 // DP1.2 HBR2 is supported by HW encoder, it is retired in NI. the real meaning from SI is MST_EN
++#define ATOM_ENCODER_CAP_RECORD_MST_EN 0x01 // from SI, this bit means DP MST is enable or not.
+ #define ATOM_ENCODER_CAP_RECORD_HBR2_EN 0x02 // DP1.2 HBR2 setting is qualified and HBR2 can be enabled
+ #define ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN 0x04 // HDMI2.0 6Gbps enable or not.
++#define ATOM_ENCODER_CAP_RECORD_HBR3_EN 0x08 // DP1.3 HBR3 is supported by board.
+
+ typedef struct _ATOM_ENCODER_CAP_RECORD
+ {
+@@ -4482,6 +4843,31 @@ typedef struct _ATOM_ENCODER_CAP_RECORD
+ };
+ }ATOM_ENCODER_CAP_RECORD;
+
++// Used after SI
++typedef struct _ATOM_ENCODER_CAP_RECORD_V2
++{
++ ATOM_COMMON_RECORD_HEADER sheader;
++ union {
++ USHORT usEncoderCap;
++ struct {
++#if ATOM_BIG_ENDIAN
++ USHORT usReserved:12; // Bit4-15 may be defined for other capability in future
++ USHORT usHBR3En:1; // bit3 is for DP1.3 HBR3 enable
++ USHORT usHDMI6GEn:1; // Bit2 is for HDMI6Gbps enable, this bit is used starting from CZ( APU) Ellemere (dGPU)
++ USHORT usHBR2En:1; // Bit1 is for DP1.2 HBR2 enable
++ USHORT usMSTEn:1; // Bit0 is for DP1.2 MST enable
++#else
++ USHORT usMSTEn:1; // Bit0 is for DP1.2 MST enable
++ USHORT usHBR2En:1; // Bit1 is for DP1.2 HBR2 enable
++ USHORT usHDMI6GEn:1; // Bit2 is for HDMI6Gbps enable, this bit is used starting from CZ( APU) Ellemere (dGPU)
++ USHORT usHBR3En:1; // bit3 is for DP1.3 HBR3 enable
++ USHORT usReserved:12; // Bit4-15 may be defined for other capability in future
++#endif
++ };
++ };
++}ATOM_ENCODER_CAP_RECORD_V2;
++
++
+ // value for ATOM_CONNECTOR_CF_RECORD.ucConnectedDvoBundle
+ #define ATOM_CONNECTOR_CF_RECORD_CONNECTED_UPPER12BITBUNDLEA 1
+ #define ATOM_CONNECTOR_CF_RECORD_CONNECTED_LOWER12BITBUNDLEB 2
+@@ -4554,6 +4940,16 @@ typedef struct _ATOM_CONNECTOR_REMOTE_CAP_RECORD
+ USHORT usReserved;
+ }ATOM_CONNECTOR_REMOTE_CAP_RECORD;
+
++
++typedef struct _ATOM_CONNECTOR_FORCED_TMDS_CAP_RECORD
++{
++ ATOM_COMMON_RECORD_HEADER sheader;
++ // override TMDS capability on this connector when it operate in TMDS mode. usMaxTmdsClkRate = max TMDS Clock in Mhz/2.5
++ UCHAR ucMaxTmdsClkRateIn2_5Mhz;
++ UCHAR ucReserved;
++} ATOM_CONNECTOR_FORCED_TMDS_CAP_RECORD;
++
++
+ typedef struct _ATOM_CONNECTOR_LAYOUT_INFO
+ {
+ USHORT usConnectorObjectId;
+@@ -4657,12 +5053,12 @@ typedef struct _ATOM_VOLTAGE_CONTROL
+ #define VOLTAGE_CONTROL_ID_UP1801 0x0C
+ #define VOLTAGE_CONTROL_ID_ST6788A 0x0D
+ #define VOLTAGE_CONTROL_ID_CHLIR3564SVI2 0x0E
+-#define VOLTAGE_CONTROL_ID_AD527x 0x0F
+-#define VOLTAGE_CONTROL_ID_NCP81022 0x10
+-#define VOLTAGE_CONTROL_ID_LTC2635 0x11
+-#define VOLTAGE_CONTROL_ID_NCP4208 0x12
++#define VOLTAGE_CONTROL_ID_AD527x 0x0F
++#define VOLTAGE_CONTROL_ID_NCP81022 0x10
++#define VOLTAGE_CONTROL_ID_LTC2635 0x11
++#define VOLTAGE_CONTROL_ID_NCP4208 0x12
+ #define VOLTAGE_CONTROL_ID_IR35xx 0x13
+-#define VOLTAGE_CONTROL_ID_RT9403 0x14
++#define VOLTAGE_CONTROL_ID_RT9403 0x14
+
+ #define VOLTAGE_CONTROL_ID_GENERIC_I2C 0x40
+
+@@ -4784,11 +5180,38 @@ typedef struct _ATOM_SVID2_VOLTAGE_OBJECT_V3
+ ULONG ulReserved;
+ }ATOM_SVID2_VOLTAGE_OBJECT_V3;
+
++
++
++typedef struct _ATOM_MERGED_VOLTAGE_OBJECT_V3
++{
++ ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader; // voltage mode = VOLTAGE_OBJ_MERGED_POWER
++ UCHAR ucMergedVType; // VDDC/VDCCI/....
++ UCHAR ucReserved[3];
++}ATOM_MERGED_VOLTAGE_OBJECT_V3;
++
++
++typedef struct _ATOM_EVV_DPM_INFO
++{
++ ULONG ulDPMSclk; // DPM state SCLK
++ USHORT usVAdjOffset; // Adjust Voltage offset in unit of mv
++ UCHAR ucDPMTblVIndex; // Voltage Index in SMC_DPM_Table structure VddcTable/VddGfxTable
++ UCHAR ucDPMState; // DPMState0~7
++} ATOM_EVV_DPM_INFO;
++
++// ucVoltageMode = VOLTAGE_OBJ_EVV
++typedef struct _ATOM_EVV_VOLTAGE_OBJECT_V3
++{
++ ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader; // voltage mode = VOLTAGE_OBJ_SVID2
++ ATOM_EVV_DPM_INFO asEvvDpmList[8];
++}ATOM_EVV_VOLTAGE_OBJECT_V3;
++
++
+ typedef union _ATOM_VOLTAGE_OBJECT_V3{
+ ATOM_GPIO_VOLTAGE_OBJECT_V3 asGpioVoltageObj;
+ ATOM_I2C_VOLTAGE_OBJECT_V3 asI2cVoltageObj;
+ ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 asLeakageObj;
+ ATOM_SVID2_VOLTAGE_OBJECT_V3 asSVID2Obj;
++ ATOM_EVV_VOLTAGE_OBJECT_V3 asEvvObj;
+ }ATOM_VOLTAGE_OBJECT_V3;
+
+ typedef struct _ATOM_VOLTAGE_OBJECT_INFO_V3_1
+@@ -4963,7 +5386,11 @@ typedef struct _ATOM_ASIC_PROFILING_INFO_V3_3
+ ULONG ulLkgEncodeMax;
+ ULONG ulLkgEncodeMin;
+ ULONG ulEfuseLogisticAlpha;
++
++ union{
+ USHORT usPowerDpm0;
++ USHORT usParamNegFlag; //bit0 =1 :indicate ulRoBeta is Negative, bit1=1 indicate Kv_m max is postive
++ };
+ USHORT usPowerDpm1;
+ USHORT usPowerDpm2;
+ USHORT usPowerDpm3;
+@@ -5067,6 +5494,86 @@ typedef struct _ATOM_ASIC_PROFILING_INFO_V3_4
+ ULONG ulReserved[8]; // Reserved for future ASIC
+ }ATOM_ASIC_PROFILING_INFO_V3_4;
+
++// for Polaris10/Polaris11 speed EVV algorithm
++typedef struct _ATOM_ASIC_PROFILING_INFO_V3_5
++{
++ ATOM_COMMON_TABLE_HEADER asHeader;
++ ULONG ulMaxVddc; //Maximum voltage for all parts, in unit of 0.01mv
++ ULONG ulMinVddc; //Minimum voltage for all parts, in unit of 0.01mv
++ USHORT usLkgEuseIndex; //Efuse Lkg_FT address ( BYTE address )
++ UCHAR ucLkgEfuseBitLSB; //Efuse Lkg_FT bit shift in 32bit DWORD
++ UCHAR ucLkgEfuseLength; //Efuse Lkg_FT length
++ ULONG ulLkgEncodeLn_MaxDivMin; //value of ln(Max_Lkg_Ft/Min_Lkg_Ft ) in unit of 0.00001 ( unit=100000 )
++ ULONG ulLkgEncodeMax; //Maximum Lkg_Ft measured value ( or efuse decode value ), in unit of 0.00001 ( unit=100000 )
++ ULONG ulLkgEncodeMin; //Minimum Lkg_Ft measured value ( or efuse decode value ), in unit of 0.00001 ( unit=100000 )
++ EFUSE_LINEAR_FUNC_PARAM sRoFuse;//Efuse RO info: DWORD address, bit shift, length, max/min measure value. in unit of 1.
++ ULONG ulEvvDefaultVddc; //def="EVV_DEFAULT_VDDC" descr="return default VDDC(v) when Efuse not cut" unit="100000"/>
++ ULONG ulEvvNoCalcVddc; //def="EVV_NOCALC_VDDC" descr="return VDDC(v) when Calculation is bad" unit="100000"/>
++ ULONG ulSpeed_Model; //def="EVV_SPEED_MODEL" descr="0 = Greek model, 1 = multivariate model" unit="1"/>
++ ULONG ulSM_A0; //def="EVV_SM_A0" descr="Leakage coeff(Multivariant Mode)." unit="100000"/>
++ ULONG ulSM_A1; //def="EVV_SM_A1" descr="Leakage/SCLK coeff(Multivariant Mode)." unit="1000000"/>
++ ULONG ulSM_A2; //def="EVV_SM_A2" descr="Alpha( Greek Mode ) or VDDC/SCLK coeff(Multivariant Mode)." unit="100000"/>
++ ULONG ulSM_A3; //def="EVV_SM_A3" descr="Beta( Greek Mode ) or SCLK coeff(Multivariant Mode)." unit="100000"/>
++ ULONG ulSM_A4; //def="EVV_SM_A4" descr="VDDC^2/SCLK coeff(Multivariant Mode)." unit="100000"/>
++ ULONG ulSM_A5; //def="EVV_SM_A5" descr="VDDC^2 coeff(Multivariant Mode)." unit="100000"/>
++ ULONG ulSM_A6; //def="EVV_SM_A6" descr="Gamma( Greek Mode ) or VDDC coeff(Multivariant Mode)." unit="100000"/>
++ ULONG ulSM_A7; //def="EVV_SM_A7" descr="Epsilon( Greek Mode ) or constant(Multivariant Mode)." unit="100000"/>
++ UCHAR ucSM_A0_sign; //def="EVV_SM_A0_SIGN" descr="=0 SM_A0 is postive. =1: SM_A0 is negative" unit="1"/>
++ UCHAR ucSM_A1_sign; //def="EVV_SM_A1_SIGN" descr="=0 SM_A1 is postive. =1: SM_A1 is negative" unit="1"/>
++ UCHAR ucSM_A2_sign; //def="EVV_SM_A2_SIGN" descr="=0 SM_A2 is postive. =1: SM_A2 is negative" unit="1"/>
++ UCHAR ucSM_A3_sign; //def="EVV_SM_A3_SIGN" descr="=0 SM_A3 is postive. =1: SM_A3 is negative" unit="1"/>
++ UCHAR ucSM_A4_sign; //def="EVV_SM_A4_SIGN" descr="=0 SM_A4 is postive. =1: SM_A4 is negative" unit="1"/>
++ UCHAR ucSM_A5_sign; //def="EVV_SM_A5_SIGN" descr="=0 SM_A5 is postive. =1: SM_A5 is negative" unit="1"/>
++ UCHAR ucSM_A6_sign; //def="EVV_SM_A6_SIGN" descr="=0 SM_A6 is postive. =1: SM_A6 is negative" unit="1"/>
++ UCHAR ucSM_A7_sign; //def="EVV_SM_A7_SIGN" descr="=0 SM_A7 is postive. =1: SM_A7 is negative" unit="1"/>
++ ULONG ulMargin_RO_a; //def="EVV_MARGIN_RO_A" descr="A Term to represent RO equation in Ax2+Bx+C, unit=1"
++ ULONG ulMargin_RO_b; //def="EVV_MARGIN_RO_B" descr="B Term to represent RO equation in Ax2+Bx+C, unit=1"
++ ULONG ulMargin_RO_c; //def="EVV_MARGIN_RO_C" descr="C Term to represent RO equation in Ax2+Bx+C, unit=1"
++ ULONG ulMargin_fixed; //def="EVV_MARGIN_FIXED" descr="Fixed MHz to add to SCLK margin, unit=1" unit="1"/>
++ ULONG ulMargin_Fmax_mean; //def="EVV_MARGIN_FMAX_MEAN" descr="Percentage to add for Fmas mean margin unit=10000" unit="10000"/>
++ ULONG ulMargin_plat_mean; //def="EVV_MARGIN_PLAT_MEAN" descr="Percentage to add for platform mean margin unit=10000" unit="10000"/>
++ ULONG ulMargin_Fmax_sigma; //def="EVV_MARGIN_FMAX_SIGMA" descr="Percentage to add for Fmax sigma margin unit=10000" unit="10000"/>
++ ULONG ulMargin_plat_sigma; //def="EVV_MARGIN_PLAT_SIGMA" descr="Percentage to add for platform sigma margin unit=10000" unit="10000"/>
++ ULONG ulMargin_DC_sigma; //def="EVV_MARGIN_DC_SIGMA" descr="Regulator DC tolerance margin (mV) unit=100" unit="100"/>
++ ULONG ulReserved[12];
++}ATOM_ASIC_PROFILING_INFO_V3_5;
++
++
++typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{
++ ULONG ulMaxSclkFreq;
++ UCHAR ucVco_setting; // 1: 3-6GHz, 3: 2-4GHz
++ UCHAR ucPostdiv; // divide by 2^n
++ USHORT ucFcw_pcc;
++ USHORT ucFcw_trans_upper;
++ USHORT ucRcw_trans_lower;
++}ATOM_SCLK_FCW_RANGE_ENTRY_V1;
++
++
++// SMU_InfoTable for Polaris10/Polaris11
++typedef struct _ATOM_SMU_INFO_V2_1
++{
++ ATOM_COMMON_TABLE_HEADER asHeader;
++ UCHAR ucSclkEntryNum; // for potential future extend, indicate the number of ATOM_SCLK_FCW_RANGE_ENTRY_V1
++ UCHAR ucReserved[3];
++ ATOM_SCLK_FCW_RANGE_ENTRY_V1 asSclkFcwRangeEntry[8];
++}ATOM_SMU_INFO_V2_1;
++
++
++// GFX_InfoTable for Polaris10/Polaris11
++typedef struct _ATOM_GFX_INFO_V2_1
++{
++ ATOM_COMMON_TABLE_HEADER asHeader;
++ UCHAR GfxIpMinVer;
++ UCHAR GfxIpMajVer;
++ UCHAR max_shader_engines;
++ UCHAR max_tile_pipes;
++ UCHAR max_cu_per_sh;
++ UCHAR max_sh_per_se;
++ UCHAR max_backends_per_se;
++ UCHAR max_texture_channel_caches;
++}ATOM_GFX_INFO_V2_1;
++
++
+ typedef struct _ATOM_POWER_SOURCE_OBJECT
+ {
+ UCHAR ucPwrSrcId; // Power source
+@@ -5765,14 +6272,6 @@ sExtDispConnInfo: Display connector information table provided t
+
+ **********************************************************************************************************************/
+
+-// this Table is used for Kaveri/Kabini APU
+-typedef struct _ATOM_FUSION_SYSTEM_INFO_V2
+-{
+- ATOM_INTEGRATED_SYSTEM_INFO_V1_8 sIntegratedSysInfo; // refer to ATOM_INTEGRATED_SYSTEM_INFO_V1_8 definition
+- ULONG ulPowerplayTable[128]; // Update comments here to link new powerplay table definition structure
+-}ATOM_FUSION_SYSTEM_INFO_V2;
+-
+-
+ typedef struct _ATOM_I2C_REG_INFO
+ {
+ UCHAR ucI2cRegIndex;
+@@ -5859,7 +6358,50 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_9
+ #define EDP_VS_VARIABLE_PREM_MODE 5
+
+
+-// this IntegrateSystemInfoTable is used for Carrizo
++// ulGPUCapInfo
++#define SYS_INFO_V1_9_GPUCAPSINFO_DISABLE_AUX_MODE_DETECT 0x08
++#define SYS_INFO_V1_9_GPUCAPSINFO_ENABEL_DFS_BYPASS 0x10
++//ulGPUCapInfo[16]=1 indicate SMC firmware is able to support GNB fast resume function, so that driver can call SMC to program most of GNB register during resuming, from ML
++#define SYS_INFO_V1_9_GPUCAPSINFO_GNB_FAST_RESUME_CAPABLE 0x00010000
++//ulGPUCapInfo[18]=1 indicate the IOMMU is not available
++#define SYS_INFO_V1_9_GPUCAPINFO_IOMMU_DISABLE 0x00040000
++//ulGPUCapInfo[19]=1 indicate the MARC Aperture is opened.
++#define SYS_INFO_V1_9_GPUCAPINFO_MARC_APERTURE_ENABLE 0x00080000
++
++
++typedef struct _DPHY_TIMING_PARA
++{
++ UCHAR ucProfileID; // SENSOR_PROFILES
++ ULONG ucPara;
++} DPHY_TIMING_PARA;
++
++typedef struct _DPHY_ELEC_PARA
++{
++ USHORT usPara[3];
++} DPHY_ELEC_PARA;
++
++typedef struct _CAMERA_MODULE_INFO
++{
++ UCHAR ucID; // 0: Rear, 1: Front right of user, 2: Front left of user
++ UCHAR strModuleName[8];
++ DPHY_TIMING_PARA asTimingPara[6]; // Exact number is under estimation and confirmation from sensor vendor
++} CAMERA_MODULE_INFO;
++
++typedef struct _FLASHLIGHT_INFO
++{
++ UCHAR ucID; // 0: Rear, 1: Front
++ UCHAR strName[8];
++} FLASHLIGHT_INFO;
++
++typedef struct _CAMERA_DATA
++{
++ ULONG ulVersionCode;
++ CAMERA_MODULE_INFO asCameraInfo[3]; // Assuming 3 camera sensors max
++ FLASHLIGHT_INFO asFlashInfo; // Assuming 1 flashlight max
++ DPHY_ELEC_PARA asDphyElecPara;
++ ULONG ulCrcVal; // CRC
++}CAMERA_DATA;
++
+ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_10
+ {
+ ATOM_COMMON_TABLE_HEADER sHeader;
+@@ -5883,7 +6425,7 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_10
+ USHORT usPanelRefreshRateRange;
+ UCHAR ucMemoryType;
+ UCHAR ucUMAChannelNumber;
+- UCHAR strVBIOSMsg[40];
++ ULONG ulMsgReserved[10];
+ ATOM_TDP_CONFIG asTdpConfig;
+ ULONG ulReserved[7];
+ ATOM_CLK_VOLT_CAPABILITY_V2 sDispClkVoltageMapping[8];
+@@ -5925,8 +6467,27 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_10
+ UCHAR ucEDPv1_4VSMode;
+ UCHAR ucReserved2;
+ ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo;
++ CAMERA_DATA asCameraInfo;
++ ULONG ulReserved8[29];
+ }ATOM_INTEGRATED_SYSTEM_INFO_V1_10;
+
++
++// this Table is used for Kaveri/Kabini APU
++typedef struct _ATOM_FUSION_SYSTEM_INFO_V2
++{
++ ATOM_INTEGRATED_SYSTEM_INFO_V1_8 sIntegratedSysInfo; // refer to ATOM_INTEGRATED_SYSTEM_INFO_V1_8 definition
++ ULONG ulPowerplayTable[128]; // Update comments here to link new powerplay table definition structure
++}ATOM_FUSION_SYSTEM_INFO_V2;
++
++
++typedef struct _ATOM_FUSION_SYSTEM_INFO_V3
++{
++ ATOM_INTEGRATED_SYSTEM_INFO_V1_10 sIntegratedSysInfo; // refer to ATOM_INTEGRATED_SYSTEM_INFO_V1_8 definition
++ ULONG ulPowerplayTable[192]; // Reserve 768 bytes space for PowerPlayInfoTable
++}ATOM_FUSION_SYSTEM_INFO_V3;
++
++#define FUSION_V3_OFFSET_FROM_TOP_OF_FB 0x800
++
+ /**************************************************************************/
+ // This portion is only used when ext thermal chip or engine/memory clock SS chip is populated on a design
+ //Memory SS Info Table
+@@ -6193,12 +6754,12 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
+ #define ATOM_S3_DFP1_ACTIVE 0x00000008L
+ #define ATOM_S3_CRT2_ACTIVE 0x00000010L
+ #define ATOM_S3_LCD2_ACTIVE 0x00000020L
+-#define ATOM_S3_DFP6_ACTIVE 0x00000040L
++#define ATOM_S3_DFP6_ACTIVE 0x00000040L
+ #define ATOM_S3_DFP2_ACTIVE 0x00000080L
+ #define ATOM_S3_CV_ACTIVE 0x00000100L
+-#define ATOM_S3_DFP3_ACTIVE 0x00000200L
+-#define ATOM_S3_DFP4_ACTIVE 0x00000400L
+-#define ATOM_S3_DFP5_ACTIVE 0x00000800L
++#define ATOM_S3_DFP3_ACTIVE 0x00000200L
++#define ATOM_S3_DFP4_ACTIVE 0x00000400L
++#define ATOM_S3_DFP5_ACTIVE 0x00000800L
+
+
+ #define ATOM_S3_DEVICE_ACTIVE_MASK 0x00000FFFL
+@@ -6215,9 +6776,9 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
+ #define ATOM_S3_DFP6_CRTC_ACTIVE 0x00400000L
+ #define ATOM_S3_DFP2_CRTC_ACTIVE 0x00800000L
+ #define ATOM_S3_CV_CRTC_ACTIVE 0x01000000L
+-#define ATOM_S3_DFP3_CRTC_ACTIVE 0x02000000L
+-#define ATOM_S3_DFP4_CRTC_ACTIVE 0x04000000L
+-#define ATOM_S3_DFP5_CRTC_ACTIVE 0x08000000L
++#define ATOM_S3_DFP3_CRTC_ACTIVE 0x02000000L
++#define ATOM_S3_DFP4_CRTC_ACTIVE 0x04000000L
++#define ATOM_S3_DFP5_CRTC_ACTIVE 0x08000000L
+
+
+ #define ATOM_S3_DEVICE_CRTC_ACTIVE_MASK 0x0FFF0000L
+@@ -6238,9 +6799,9 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
+ #define ATOM_S3_DFP6_ACTIVEb0 0x40
+ #define ATOM_S3_DFP2_ACTIVEb0 0x80
+ #define ATOM_S3_CV_ACTIVEb1 0x01
+-#define ATOM_S3_DFP3_ACTIVEb1 0x02
+-#define ATOM_S3_DFP4_ACTIVEb1 0x04
+-#define ATOM_S3_DFP5_ACTIVEb1 0x08
++#define ATOM_S3_DFP3_ACTIVEb1 0x02
++#define ATOM_S3_DFP4_ACTIVEb1 0x04
++#define ATOM_S3_DFP5_ACTIVEb1 0x08
+
+
+ #define ATOM_S3_ACTIVE_CRTC1w0 0xFFF
+@@ -6254,9 +6815,9 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
+ #define ATOM_S3_DFP6_CRTC_ACTIVEb2 0x40
+ #define ATOM_S3_DFP2_CRTC_ACTIVEb2 0x80
+ #define ATOM_S3_CV_CRTC_ACTIVEb3 0x01
+-#define ATOM_S3_DFP3_CRTC_ACTIVEb3 0x02
+-#define ATOM_S3_DFP4_CRTC_ACTIVEb3 0x04
+-#define ATOM_S3_DFP5_CRTC_ACTIVEb3 0x08
++#define ATOM_S3_DFP3_CRTC_ACTIVEb3 0x02
++#define ATOM_S3_DFP4_CRTC_ACTIVEb3 0x04
++#define ATOM_S3_DFP5_CRTC_ACTIVEb3 0x08
+
+
+ #define ATOM_S3_ACTIVE_CRTC2w1 0xFFF
+@@ -6878,15 +7439,18 @@ typedef struct _ATOM_MC_INIT_PARAM_TABLE_V2_1
+ #define _32Mx16 0x32
+ #define _32Mx32 0x33
+ #define _32Mx128 0x35
+-#define _64Mx32 0x43
+ #define _64Mx8 0x41
+ #define _64Mx16 0x42
++#define _64Mx32 0x43
++#define _64Mx128 0x45
+ #define _128Mx8 0x51
+ #define _128Mx16 0x52
+ #define _128Mx32 0x53
+ #define _256Mx8 0x61
+ #define _256Mx16 0x62
++#define _256Mx32 0x63
+ #define _512Mx8 0x71
++#define _512Mx16 0x72
+
+
+ #define SAMSUNG 0x1
+@@ -7407,6 +7971,17 @@ typedef struct _ATOM_MEMORY_TRAINING_INFO
+ }ATOM_MEMORY_TRAINING_INFO;
+
+
++typedef struct _ATOM_MEMORY_TRAINING_INFO_V3_1
++{
++ ATOM_COMMON_TABLE_HEADER sHeader;
++ ULONG ulMCUcodeVersion;
++ USHORT usMCIOInitLen; //len of ATOM_REG_INIT_SETTING array
++ USHORT usMCUcodeLen; //len of ATOM_MC_UCODE_DATA array
++ USHORT usMCIORegInitOffset; //point of offset of ATOM_REG_INIT_SETTING array
++ USHORT usMCUcodeOffset; //point of offset of MC uCode ULONG array.
++}ATOM_MEMORY_TRAINING_INFO_V3_1;
++
++
+ typedef struct SW_I2C_CNTL_DATA_PARAMETERS
+ {
+ UCHAR ucControl;
+@@ -7623,7 +8198,7 @@ typedef struct _ASIC_TRANSMITTER_INFO
+ {
+ USHORT usTransmitterObjId;
+ USHORT usSupportDevice;
+- UCHAR ucTransmitterCmdTblId;
++ UCHAR ucTransmitterCmdTblId;
+ UCHAR ucConfig;
+ UCHAR ucEncoderID; //available 1st encoder ( default )
+ UCHAR ucOptionEncoderID; //available 2nd encoder ( optional )
+diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
+index aec38fc..a461e15 100644
+--- a/drivers/gpu/drm/amd/include/cgs_common.h
++++ b/drivers/gpu/drm/amd/include/cgs_common.h
+@@ -26,6 +26,8 @@
+
+ #include "amd_shared.h"
+
++struct cgs_device;
++
+ /**
+ * enum cgs_gpu_mem_type - GPU memory types
+ */
+@@ -92,6 +94,7 @@ enum cgs_voltage_planes {
+ */
+ enum cgs_ucode_id {
+ CGS_UCODE_ID_SMU = 0,
++ CGS_UCODE_ID_SMU_SK,
+ CGS_UCODE_ID_SDMA0,
+ CGS_UCODE_ID_SDMA1,
+ CGS_UCODE_ID_CP_CE,
+@@ -111,6 +114,7 @@ enum cgs_system_info_id {
+ CGS_SYSTEM_INFO_PCIE_MLW,
+ CGS_SYSTEM_INFO_CG_FLAGS,
+ CGS_SYSTEM_INFO_PG_FLAGS,
++ CGS_SYSTEM_INFO_GFX_CU_INFO,
+ CGS_SYSTEM_INFO_ID_MAXIMUM,
+ };
+
+@@ -223,7 +227,7 @@ struct cgs_acpi_method_info {
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_gpu_mem_info_t)(void *cgs_device, enum cgs_gpu_mem_type type,
++typedef int (*cgs_gpu_mem_info_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
+ uint64_t *mc_start, uint64_t *mc_size,
+ uint64_t *mem_size);
+
+@@ -239,7 +243,7 @@ typedef int (*cgs_gpu_mem_info_t)(void *cgs_device, enum cgs_gpu_mem_type type,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_gmap_kmem_t)(void *cgs_device, void *kmem, uint64_t size,
++typedef int (*cgs_gmap_kmem_t)(struct cgs_device *cgs_device, void *kmem, uint64_t size,
+ uint64_t min_offset, uint64_t max_offset,
+ cgs_handle_t *kmem_handle, uint64_t *mcaddr);
+
+@@ -250,7 +254,7 @@ typedef int (*cgs_gmap_kmem_t)(void *cgs_device, void *kmem, uint64_t size,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_gunmap_kmem_t)(void *cgs_device, cgs_handle_t kmem_handle);
++typedef int (*cgs_gunmap_kmem_t)(struct cgs_device *cgs_device, cgs_handle_t kmem_handle);
+
+ /**
+ * cgs_alloc_gpu_mem() - Allocate GPU memory
+@@ -279,7 +283,7 @@ typedef int (*cgs_gunmap_kmem_t)(void *cgs_device, cgs_handle_t kmem_handle);
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_alloc_gpu_mem_t)(void *cgs_device, enum cgs_gpu_mem_type type,
++typedef int (*cgs_alloc_gpu_mem_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
+ uint64_t size, uint64_t align,
+ uint64_t min_offset, uint64_t max_offset,
+ cgs_handle_t *handle);
+@@ -291,7 +295,7 @@ typedef int (*cgs_alloc_gpu_mem_t)(void *cgs_device, enum cgs_gpu_mem_type type,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_free_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
++typedef int (*cgs_free_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle);
+
+ /**
+ * cgs_gmap_gpu_mem() - GPU-map GPU memory
+@@ -303,7 +307,7 @@ typedef int (*cgs_free_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_gmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle,
++typedef int (*cgs_gmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle,
+ uint64_t *mcaddr);
+
+ /**
+@@ -315,7 +319,7 @@ typedef int (*cgs_gmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_gunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
++typedef int (*cgs_gunmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle);
+
+ /**
+ * cgs_kmap_gpu_mem() - Kernel-map GPU memory
+@@ -326,7 +330,7 @@ typedef int (*cgs_gunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_kmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle,
++typedef int (*cgs_kmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle,
+ void **map);
+
+ /**
+@@ -336,7 +340,7 @@ typedef int (*cgs_kmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_kunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
++typedef int (*cgs_kunmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle);
+
+ /**
+ * cgs_read_register() - Read an MMIO register
+@@ -345,7 +349,7 @@ typedef int (*cgs_kunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
+ *
+ * Return: register value
+ */
+-typedef uint32_t (*cgs_read_register_t)(void *cgs_device, unsigned offset);
++typedef uint32_t (*cgs_read_register_t)(struct cgs_device *cgs_device, unsigned offset);
+
+ /**
+ * cgs_write_register() - Write an MMIO register
+@@ -353,7 +357,7 @@ typedef uint32_t (*cgs_read_register_t)(void *cgs_device, unsigned offset);
+ * @offset: register offset
+ * @value: register value
+ */
+-typedef void (*cgs_write_register_t)(void *cgs_device, unsigned offset,
++typedef void (*cgs_write_register_t)(struct cgs_device *cgs_device, unsigned offset,
+ uint32_t value);
+
+ /**
+@@ -363,7 +367,7 @@ typedef void (*cgs_write_register_t)(void *cgs_device, unsigned offset,
+ *
+ * Return: register value
+ */
+-typedef uint32_t (*cgs_read_ind_register_t)(void *cgs_device, enum cgs_ind_reg space,
++typedef uint32_t (*cgs_read_ind_register_t)(struct cgs_device *cgs_device, enum cgs_ind_reg space,
+ unsigned index);
+
+ /**
+@@ -372,7 +376,7 @@ typedef uint32_t (*cgs_read_ind_register_t)(void *cgs_device, enum cgs_ind_reg s
+ * @offset: register offset
+ * @value: register value
+ */
+-typedef void (*cgs_write_ind_register_t)(void *cgs_device, enum cgs_ind_reg space,
++typedef void (*cgs_write_ind_register_t)(struct cgs_device *cgs_device, enum cgs_ind_reg space,
+ unsigned index, uint32_t value);
+
+ /**
+@@ -382,7 +386,7 @@ typedef void (*cgs_write_ind_register_t)(void *cgs_device, enum cgs_ind_reg spac
+ *
+ * Return: Value read
+ */
+-typedef uint8_t (*cgs_read_pci_config_byte_t)(void *cgs_device, unsigned addr);
++typedef uint8_t (*cgs_read_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr);
+
+ /**
+ * cgs_read_pci_config_word() - Read word from PCI configuration space
+@@ -391,7 +395,7 @@ typedef uint8_t (*cgs_read_pci_config_byte_t)(void *cgs_device, unsigned addr);
+ *
+ * Return: Value read
+ */
+-typedef uint16_t (*cgs_read_pci_config_word_t)(void *cgs_device, unsigned addr);
++typedef uint16_t (*cgs_read_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr);
+
+ /**
+ * cgs_read_pci_config_dword() - Read dword from PCI configuration space
+@@ -400,7 +404,7 @@ typedef uint16_t (*cgs_read_pci_config_word_t)(void *cgs_device, unsigned addr);
+ *
+ * Return: Value read
+ */
+-typedef uint32_t (*cgs_read_pci_config_dword_t)(void *cgs_device,
++typedef uint32_t (*cgs_read_pci_config_dword_t)(struct cgs_device *cgs_device,
+ unsigned addr);
+
+ /**
+@@ -409,7 +413,7 @@ typedef uint32_t (*cgs_read_pci_config_dword_t)(void *cgs_device,
+ * @addr: address
+ * @value: value to write
+ */
+-typedef void (*cgs_write_pci_config_byte_t)(void *cgs_device, unsigned addr,
++typedef void (*cgs_write_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr,
+ uint8_t value);
+
+ /**
+@@ -418,7 +422,7 @@ typedef void (*cgs_write_pci_config_byte_t)(void *cgs_device, unsigned addr,
+ * @addr: address, must be word-aligned
+ * @value: value to write
+ */
+-typedef void (*cgs_write_pci_config_word_t)(void *cgs_device, unsigned addr,
++typedef void (*cgs_write_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr,
+ uint16_t value);
+
+ /**
+@@ -427,7 +431,7 @@ typedef void (*cgs_write_pci_config_word_t)(void *cgs_device, unsigned addr,
+ * @addr: address, must be dword-aligned
+ * @value: value to write
+ */
+-typedef void (*cgs_write_pci_config_dword_t)(void *cgs_device, unsigned addr,
++typedef void (*cgs_write_pci_config_dword_t)(struct cgs_device *cgs_device, unsigned addr,
+ uint32_t value);
+
+
+@@ -441,7 +445,7 @@ typedef void (*cgs_write_pci_config_dword_t)(void *cgs_device, unsigned addr,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_get_pci_resource_t)(void *cgs_device,
++typedef int (*cgs_get_pci_resource_t)(struct cgs_device *cgs_device,
+ enum cgs_resource_type resource_type,
+ uint64_t size,
+ uint64_t offset,
+@@ -458,7 +462,7 @@ typedef int (*cgs_get_pci_resource_t)(void *cgs_device,
+ * Return: Pointer to start of the table, or NULL on failure
+ */
+ typedef const void *(*cgs_atom_get_data_table_t)(
+- void *cgs_device, unsigned table,
++ struct cgs_device *cgs_device, unsigned table,
+ uint16_t *size, uint8_t *frev, uint8_t *crev);
+
+ /**
+@@ -470,7 +474,7 @@ typedef const void *(*cgs_atom_get_data_table_t)(
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_atom_get_cmd_table_revs_t)(void *cgs_device, unsigned table,
++typedef int (*cgs_atom_get_cmd_table_revs_t)(struct cgs_device *cgs_device, unsigned table,
+ uint8_t *frev, uint8_t *crev);
+
+ /**
+@@ -481,7 +485,7 @@ typedef int (*cgs_atom_get_cmd_table_revs_t)(void *cgs_device, unsigned table,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_atom_exec_cmd_table_t)(void *cgs_device,
++typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device,
+ unsigned table, void *args);
+
+ /**
+@@ -491,7 +495,7 @@ typedef int (*cgs_atom_exec_cmd_table_t)(void *cgs_device,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_create_pm_request_t)(void *cgs_device, cgs_handle_t *request);
++typedef int (*cgs_create_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t *request);
+
+ /**
+ * cgs_destroy_pm_request() - Destroy a power management request
+@@ -500,7 +504,7 @@ typedef int (*cgs_create_pm_request_t)(void *cgs_device, cgs_handle_t *request);
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_destroy_pm_request_t)(void *cgs_device, cgs_handle_t request);
++typedef int (*cgs_destroy_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request);
+
+ /**
+ * cgs_set_pm_request() - Activate or deactiveate a PM request
+@@ -516,7 +520,7 @@ typedef int (*cgs_destroy_pm_request_t)(void *cgs_device, cgs_handle_t request);
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_set_pm_request_t)(void *cgs_device, cgs_handle_t request,
++typedef int (*cgs_set_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request,
+ int active);
+
+ /**
+@@ -528,7 +532,7 @@ typedef int (*cgs_set_pm_request_t)(void *cgs_device, cgs_handle_t request,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_pm_request_clock_t)(void *cgs_device, cgs_handle_t request,
++typedef int (*cgs_pm_request_clock_t)(struct cgs_device *cgs_device, cgs_handle_t request,
+ enum cgs_clock clock, unsigned freq);
+
+ /**
+@@ -540,7 +544,7 @@ typedef int (*cgs_pm_request_clock_t)(void *cgs_device, cgs_handle_t request,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_pm_request_engine_t)(void *cgs_device, cgs_handle_t request,
++typedef int (*cgs_pm_request_engine_t)(struct cgs_device *cgs_device, cgs_handle_t request,
+ enum cgs_engine engine, int powered);
+
+ /**
+@@ -551,7 +555,7 @@ typedef int (*cgs_pm_request_engine_t)(void *cgs_device, cgs_handle_t request,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_pm_query_clock_limits_t)(void *cgs_device,
++typedef int (*cgs_pm_query_clock_limits_t)(struct cgs_device *cgs_device,
+ enum cgs_clock clock,
+ struct cgs_clock_limits *limits);
+
+@@ -563,7 +567,7 @@ typedef int (*cgs_pm_query_clock_limits_t)(void *cgs_device,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_set_camera_voltages_t)(void *cgs_device, uint32_t mask,
++typedef int (*cgs_set_camera_voltages_t)(struct cgs_device *cgs_device, uint32_t mask,
+ const uint32_t *voltages);
+ /**
+ * cgs_get_firmware_info - Get the firmware information from core driver
+@@ -573,23 +577,25 @@ typedef int (*cgs_set_camera_voltages_t)(void *cgs_device, uint32_t mask,
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+-typedef int (*cgs_get_firmware_info)(void *cgs_device,
++typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device,
+ enum cgs_ucode_id type,
+ struct cgs_firmware_info *info);
+
+-typedef int(*cgs_set_powergating_state)(void *cgs_device,
++typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device,
+ enum amd_ip_block_type block_type,
+ enum amd_powergating_state state);
+
+-typedef int(*cgs_set_clockgating_state)(void *cgs_device,
++typedef int(*cgs_set_clockgating_state)(struct cgs_device *cgs_device,
+ enum amd_ip_block_type block_type,
+ enum amd_clockgating_state state);
+
+ typedef int(*cgs_get_active_displays_info)(
+- void *cgs_device,
++ struct cgs_device *cgs_device,
+ struct cgs_display_info *info);
+
+-typedef int (*cgs_call_acpi_method)(void *cgs_device,
++typedef int (*cgs_notify_dpm_enabled)(struct cgs_device *cgs_device, bool enabled);
++
++typedef int (*cgs_call_acpi_method)(struct cgs_device *cgs_device,
+ uint32_t acpi_method,
+ uint32_t acpi_function,
+ void *pinput, void *poutput,
+@@ -597,7 +603,7 @@ typedef int (*cgs_call_acpi_method)(void *cgs_device,
+ uint32_t input_size,
+ uint32_t output_size);
+
+-typedef int (*cgs_query_system_info)(void *cgs_device,
++typedef int (*cgs_query_system_info)(struct cgs_device *cgs_device,
+ struct cgs_system_info *sys_info);
+
+ struct cgs_ops {
+@@ -644,6 +650,8 @@ struct cgs_ops {
+ cgs_set_clockgating_state set_clockgating_state;
+ /* display manager */
+ cgs_get_active_displays_info get_active_displays_info;
++ /* notify dpm enabled */
++ cgs_notify_dpm_enabled notify_dpm_enabled;
+ /* ACPI */
+ cgs_call_acpi_method call_acpi_method;
+ /* get system info */
+@@ -734,8 +742,12 @@ struct cgs_device
+ CGS_CALL(set_powergating_state, dev, block_type, state)
+ #define cgs_set_clockgating_state(dev, block_type, state) \
+ CGS_CALL(set_clockgating_state, dev, block_type, state)
++#define cgs_notify_dpm_enabled(dev, enabled) \
++ CGS_CALL(notify_dpm_enabled, dev, enabled)
++
+ #define cgs_get_active_displays_info(dev, info) \
+ CGS_CALL(get_active_displays_info, dev, info)
++
+ #define cgs_call_acpi_method(dev, acpi_method, acpi_function, pintput, poutput, output_count, input_size, output_size) \
+ CGS_CALL(call_acpi_method, dev, acpi_method, acpi_function, pintput, poutput, output_count, input_size, output_size)
+ #define cgs_query_system_info(dev, sys_info) \
+--
+1.9.1
+
diff --git a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc
index f64b5f4d..eb04733b 100644
--- a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc
+++ b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc
@@ -1461,5 +1461,6 @@ patch 1569-ACP-code-as-per-4-1-0-kernel.patch
patch 1570-4-1-0-kernel-s-drivers-I2C-directory.patch
patch 1571-Add-support-for-amd-gnb-bus.patch
patch 1572-drm-amdgpu-fix-num_rbs-exposed-to-userspace.patch
+patch 1573-Add-power-gating-initialization-support-for-GFX8.0.patch
patch 0300-amd-powerplay-handle-power-management-state-based-on.patch
patch 0001-ALSA-hda-add-AMD-Stoney-PCI-ID-with-proper-driver-ca.patch