diff options
Diffstat (limited to 'common')
9 files changed, 10019 insertions, 0 deletions
diff --git a/common/recipes-core/initrdscripts/files/0009-init-install.sh-etc-mtab-make-a-softlink-rather-than.patch b/common/recipes-core/initrdscripts/files/0009-init-install.sh-etc-mtab-make-a-softlink-rather-than.patch new file mode 100644 index 00000000..1ec0f222 --- /dev/null +++ b/common/recipes-core/initrdscripts/files/0009-init-install.sh-etc-mtab-make-a-softlink-rather-than.patch @@ -0,0 +1,35 @@ +From 8ae71c99ce763f9ce49afe25f5569931dfd6cd1e Mon Sep 17 00:00:00 2001 +From: Awais Belal <awais_belal@mentor.com> +Date: Thu, 28 Jul 2016 13:24:52 +0500 +Subject: [PATCH] init-install.sh: /etc/mtab make a softlink rather than a copy + +Using a copy would only make management of devices erroneous +and makes the system unstable in some scenarios as tools will +have to manipulate both files separately. A link ensures that +both files /proc/mounts and /etc/mtab will have the same +information at all times and this is how it is handled +on newer systems where there is such a need. + +Signed-off-by: Awais Belal <awais_belal@mentor.com> +--- + init-install.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git init-install.sh init-install.sh +index 1b69b71..5b8f925 100644 +--- init-install.sh ++++ init-install.sh +@@ -122,8 +122,8 @@ if [ ! -b /dev/loop0 ] ; then + fi + + mkdir -p /tmp +-if [ ! -L /etc/mtab ]; then +- cat /proc/mounts > /etc/mtab ++if [ ! -L /etc/mtab ] && [ -e /proc/mounts ]; then ++ ln -sf /proc/mounts /etc/mtab + fi + + disk_size=$(parted ${device} unit mb print | grep Disk | cut -d" " -f 3 | sed -e "s/MB//") +-- +1.9.1 + diff --git a/common/recipes-core/initrdscripts/files/0010-init-install-efi.sh-etc-mtab-make-a-softlink-rather-.patch b/common/recipes-core/initrdscripts/files/0010-init-install-efi.sh-etc-mtab-make-a-softlink-rather-.patch new file mode 100644 index 00000000..053ef092 --- /dev/null +++ b/common/recipes-core/initrdscripts/files/0010-init-install-efi.sh-etc-mtab-make-a-softlink-rather-.patch @@ -0,0 +1,36 @@ +From 172e6d8c4e5cb8746b87130bbd6d38a60248b343 Mon Sep 17 00:00:00 2001 +From: Awais Belal <awais_belal@mentor.com> +Date: Thu, 28 Jul 2016 13:35:01 +0500 +Subject: [PATCH] init-install-efi.sh: /etc/mtab make a softlink rather than a + copy + +Using a copy would only make management of devices erroneous +and makes the system unstable in some scenarios as tools will +have to manipulate both files separately. A link ensures that +both files /proc/mounts and /etc/mtab will have the same +information at all times and this is how it is handled +on newer systems where there is such a need. + +Signed-off-by: Awais Belal <awais_belal@mentor.com> +--- + init-install-efi.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git init-install-efi.sh init-install-efi.sh +index a6a9047..39e8806 100644 +--- init-install-efi.sh ++++ init-install-efi.sh +@@ -126,8 +126,8 @@ umount ${device}* 2> /dev/null || /bin/true + mkdir -p /tmp + + # Create /etc/mtab if not present +-if [ ! -e /etc/mtab ]; then +- cat /proc/mounts > /etc/mtab ++if [ ! -e /etc/mtab ] && [ -e /proc/mounts ]; then ++ ln -sf /proc/mounts /etc/mtab + fi + + disk_size=$(parted ${device} unit mb print | grep Disk | cut -d" " -f 3 | sed -e "s/MB//") +-- +1.9.1 + diff --git a/common/recipes-core/initrdscripts/initramfs-live-install-efi_1.0.bbappend b/common/recipes-core/initrdscripts/initramfs-live-install-efi_1.0.bbappend index 752982b5..b51ea937 100644 --- a/common/recipes-core/initrdscripts/initramfs-live-install-efi_1.0.bbappend +++ b/common/recipes-core/initrdscripts/initramfs-live-install-efi_1.0.bbappend @@ -2,4 +2,5 @@ FILESEXTRAPATHS_prepend := "${THISDIR}/files:" SRC_URI_append_amd = " \ file://0003-init-install-efi.sh-Don-t-set-quiet-kernel-option-in.patch;striplevel=0;patchdir=${WORKDIR} \ file://0004-init-install-efi.sh-Add-a-second-prompt-to-install.patch;striplevel=0;patchdir=${WORKDIR} \ + file://0010-init-install-efi.sh-etc-mtab-make-a-softlink-rather-.patch;striplevel=0;patchdir=${WORKDIR} \ " diff --git a/common/recipes-core/initrdscripts/initramfs-live-install_1.0.bbappend b/common/recipes-core/initrdscripts/initramfs-live-install_1.0.bbappend index 45ccedbd..14956745 100644 --- a/common/recipes-core/initrdscripts/initramfs-live-install_1.0.bbappend +++ b/common/recipes-core/initrdscripts/initramfs-live-install_1.0.bbappend @@ -2,4 +2,5 @@ FILESEXTRAPATHS_prepend := "${THISDIR}/files:" SRC_URI_append_amd = " \ file://0001-init-install.sh-Don-t-set-quiet-kernel-option-in-ins.patch;striplevel=0;patchdir=${WORKDIR} \ file://0002-init-install.sh-Add-a-second-prompt-to-install.patch;striplevel=0;patchdir=${WORKDIR} \ + file://0009-init-install.sh-etc-mtab-make-a-softlink-rather-than.patch;striplevel=0;patchdir=${WORKDIR} \ " diff --git a/common/recipes-graphics/xinput-calibrator/files/add-geometry-input-when-calibrating.patch b/common/recipes-graphics/xinput-calibrator/files/add-geometry-input-when-calibrating.patch new file mode 100644 index 00000000..cfc56529 --- /dev/null +++ b/common/recipes-graphics/xinput-calibrator/files/add-geometry-input-when-calibrating.patch @@ -0,0 +1,32 @@ +Upstream-Status: Inappropriate [no longer maintained] + +From 93abf28d602da637376b78de8c88b7ab5cf13b4f Mon Sep 17 00:00:00 2001 +From: Jonathan David <jonathan.david@ni.com> +Date: Mon, 30 Nov 2015 12:12:20 -0600 +Subject: [PATCH] add geometry input when calibrating + +Send monitor geometry to xinput_calibrator when running the script + +Signed-off-by: Jonathan David <jonathan.david@ni.com> +--- + scripts/xinput_calibrator_pointercal.sh | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/scripts/xinput_calibrator_pointercal.sh b/scripts/xinput_calibrator_pointercal.sh +index 0ada7da..6a3aded 100755 +--- a/scripts/xinput_calibrator_pointercal.sh ++++ b/scripts/xinput_calibrator_pointercal.sh +@@ -37,7 +37,10 @@ done + + [ "$USER" != "root" ] && CALFILE=$USER_CALFILE + +-CALDATA=`$BINARY --output-type xinput -v | tee $LOGFILE | grep ' xinput set' | sed 's/^ //g; s/$/;/g'` ++read RESOLUTION <<< $(xrandr | awk -F '[[:space:]+]' '/ connected/ \ ++ { if ($3 != "primary") print $3; if ($3 == "primary") print $4 }') ++ ++CALDATA=`$BINARY --geometry $RESOLUTION --output-type xinput -v | tee $LOGFILE | grep ' xinput set' | sed 's/^ //g; s/$/;/g'` + if [ ! -z "$CALDATA" ] ; then + echo $CALDATA > $CALFILE + echo "Calibration data stored in $CALFILE (log in $LOGFILE)" +-- +1.9.1 diff --git a/common/recipes-graphics/xinput-calibrator/xinput-calibrator_git.bbappend b/common/recipes-graphics/xinput-calibrator/xinput-calibrator_git.bbappend new file mode 100644 index 00000000..98ae3132 --- /dev/null +++ b/common/recipes-graphics/xinput-calibrator/xinput-calibrator_git.bbappend @@ -0,0 +1,8 @@ +FILESEXTRAPATHS_prepend := "${THISDIR}/files:" +SRC_URI_append_amd = " file://add-geometry-input-when-calibrating.patch" +RDEPENDS_${PN}_append_amd = " xrandr bash" + +do_install_append_amd() { + # Do not install the boot time auto launcher + rm -rf ${D}${sysconfdir}/xdg/autostart +} diff --git a/common/recipes-kernel/linux/files/0001-fs-prioritize-ext4-rootfs-type.patch b/common/recipes-kernel/linux/files/0001-fs-prioritize-ext4-rootfs-type.patch new file mode 100644 index 00000000..d70a577a --- /dev/null +++ b/common/recipes-kernel/linux/files/0001-fs-prioritize-ext4-rootfs-type.patch @@ -0,0 +1,35 @@ +From 00a74e44dc3f7656f23719aa978d035a2b33c038 Mon Sep 17 00:00:00 2001 +From: Awais Belal <awais_belal@mentor.com> +Date: Fri, 22 Jul 2016 18:54:39 +0500 +Subject: [PATCH] fs: prioritize ext4 rootfs type + +This would allow us to go directly to ext4 type rootfs checks +otherwise ext3 and ext2 will be tried first. This gives a +performance benefit in our case where our RootFS is known +to be ext4. + +Signed-off-by: Awais Belal <awais_belal@mentor.com> +--- + fs/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/Makefile b/fs/Makefile +index 5fa9520..642771a 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -62,11 +62,11 @@ obj-$(CONFIG_DLM) += dlm/ + # Do not add any filesystems before this line + obj-$(CONFIG_FSCACHE) += fscache/ + obj-$(CONFIG_REISERFS_FS) += reiserfs/ ++obj-$(CONFIG_EXT4_FS) += ext4/ + obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 + obj-$(CONFIG_EXT2_FS) += ext2/ + # We place ext4 after ext2 so plain ext2 root fs's are mounted using ext2 + # unless explicitly requested by rootfstype +-obj-$(CONFIG_EXT4_FS) += ext4/ + obj-$(CONFIG_JBD) += jbd/ + obj-$(CONFIG_JBD2) += jbd2/ + obj-$(CONFIG_CRAMFS) += cramfs/ +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/files/1573-Add-power-gating-initialization-support-for-GFX8.0.patch b/common/recipes-kernel/linux/files/1573-Add-power-gating-initialization-support-for-GFX8.0.patch new file mode 100644 index 00000000..be4fb9fa --- /dev/null +++ b/common/recipes-kernel/linux/files/1573-Add-power-gating-initialization-support-for-GFX8.0.patch @@ -0,0 +1,9869 @@ +From c712d1b59efd1e29dd032871a3a34cbfd0c7af75 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Wed, 20 Jul 2016 14:20:32 +0530 +Subject: [PATCH] Add power gating initialization support for GFX8.0 + + - Enable GFX PG on CZ. + Tested with xonotic-glx/glxgears/supertuxkart and idle desktop. + Also read-back registers via umr for verificiation that the bits# Please enter the commit message for your changes. Lines starting + are truly enabled.# with '#' will be ignored, and an empty message aborts the commit. + - Enable CG for UVD6 on Carrizo# modified: drivers/gpu/drm/amd/amdgpu/Kconfig + - Use dal driver for CZ# + - Enable gmc clockgating for CZ + - Enable gfx clockgating for CZ + - Fetch the values from atom rather than hardcoding them in the + driver. + - Fetch cu_info once at init and just store the results + for future requests. + - fix memory leak in CGS (FW info).Previously requested FW + pointer should not be overwrite. + - add query GFX cu info in CGS query system info. + Needed for per CU powergating. + - add an cgs interface to notify amdgpu the dpm state. + - fix segment fault issue in multi-display case. + - keep vm in job instead of ib. + ib.vm is a legacy way to get vm, after scheduler + implemented vm should be get from job, and all ibs + from one job share the same vm, no need to keep ib.vm + just move vm field to job. + this patch as well add job as paramter to ib_schedule + so it can get vm from job->vm. + - remove sorting of CS BOs. Not needed any more. + - create fence slab once when amdgpu module init. + v2: add functions for init/exit instead of moving the + variables into the driver. + - fence wait old rcu slot. + since the rcu slot was initialized to be num_hw_submission, + if command submission doesn't use scheduler, this limitation + will be invalid like uvd test. + - v2: recreate from scratch, avoid all unineccessary changes. + - Should always flush & invalidate hdp no matter vm used or not. + - use ctx pointer is not safe, cuz they are likely already + be assigned to another ctx when doing comparing. + fence_context is always increasing and have rare chance + to overback to used number for jobs that scheduled to + ring continueonsly + - use a sync object for VMID fences. This way we can store more than + one fence as user for each VMID. + - forbid mapping of userptr bo through radeon device file. + Allowing userptr bo which are basicly a list of page from some vma + (so either anonymous page or file backed page) would lead to serious + corruption of kernel structures and counters (because we overwrite + the page->mapping field when mapping buffer). + This will already block if the buffer was populated before anyone does + try to mmap it because then TTM_PAGE_FLAG_SG would be set in in the + ttm_tt flags. But that flag is check before ttm_tt_populate in the ttm + vm fault handler. + So to be safe just add a check to verify_access() callback. + - group BOs by log2 of the size on the LRU v2. + This allows us to have small BOs on the LRU before big ones. + - implement LRU add callbacks v2. + This allows fine grained control for the driver where to add a BO into the LRU. + - Mark all instances of struct drm_info_list as const. + All these are compile time constand and the + drm_debugfs_create/remove_files functions take a const + pointer argument. + - Don't move pinned BOs. The purpose of pinning is to prevent a buffer from moving. + - when suspending, if uvd/vce was running. need to cancel delay work. + This fixes the issue that when resume back, uvd/vce dpm was disabled + and uvd/vce's performace dropped. + +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/Kconfig | 10 +- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 197 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 30 + + drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h | 2 + + drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 176 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 127 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 296 ++- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 43 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 39 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 53 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 36 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 87 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 4 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 30 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 32 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 160 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 105 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 29 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 145 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 24 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 450 ++--- + drivers/gpu/drm/amd/amdgpu/atom.h | 2 +- + drivers/gpu/drm/amd/amdgpu/atombios_crtc.c | 98 +- + drivers/gpu/drm/amd/amdgpu/atombios_crtc.h | 2 + + drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 97 +- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1946 ++++++++++++++++---- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h | 1 - + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 182 +- + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 9 +- + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 223 +-- + drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h | 1 + + drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 3 +- + drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 3 +- + drivers/gpu/drm/amd/amdgpu/vi.c | 282 ++- + drivers/gpu/drm/amd/include/amd_shared.h | 7 + + .../gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h | 2 + + drivers/gpu/drm/amd/include/atombios.h | 663 ++++++- + drivers/gpu/drm/amd/include/cgs_common.h | 84 +- + 42 files changed, 4256 insertions(+), 1456 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig +index 27f2000..3e29c86 100644 +--- a/drivers/gpu/drm/amd/amdgpu/Kconfig ++++ b/drivers/gpu/drm/amd/amdgpu/Kconfig +@@ -16,7 +16,15 @@ config DRM_AMDGPU_USERPTR + This option selects CONFIG_MMU_NOTIFIER if it isn't already + selected to enabled full userptr support. + ++config DRM_AMDGPU_GART_DEBUGFS ++ bool "Allow GART access through debugfs" ++ depends on DRM_AMDGPU ++ depends on DEBUG_FS ++ default n ++ help ++ Selecting this option creates a debugfs file to inspect the mapped ++ pages. Uses more memory for housekeeping, enable only for debugging. ++ + source "drivers/gpu/drm/amd/powerplay/Kconfig" + source "drivers/gpu/drm/amd/acp/Kconfig" + source "drivers/gpu/drm/amd/dal/Kconfig" +- +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 80d5cef..0873301 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -285,7 +285,8 @@ struct amdgpu_ring_funcs { + int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); + /* command emit functions */ + void (*emit_ib)(struct amdgpu_ring *ring, +- struct amdgpu_ib *ib); ++ struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch); + void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, + uint64_t seq, unsigned flags); + void (*emit_pipeline_sync)(struct amdgpu_ring *ring); +@@ -369,13 +370,6 @@ struct amdgpu_fence_driver { + #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) + #define AMDGPU_FENCE_FLAG_INT (1 << 1) + +-struct amdgpu_user_fence { +- /* write-back bo */ +- struct amdgpu_bo *bo; +- /* write-back address offset to bo start */ +- uint32_t offset; +-}; +- + int amdgpu_fence_driver_init(struct amdgpu_device *adev); + void amdgpu_fence_driver_fini(struct amdgpu_device *adev); + void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); +@@ -395,6 +389,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); + /* + * TTM. + */ ++ ++#define AMDGPU_TTM_LRU_SIZE 20 ++ ++struct amdgpu_mman_lru { ++ struct list_head *lru[TTM_NUM_MEM_TYPES]; ++ struct list_head *swap_lru; ++}; ++ + struct amdgpu_mman { + struct ttm_bo_global_ref bo_global_ref; + struct drm_global_reference mem_global_ref; +@@ -412,6 +414,9 @@ struct amdgpu_mman { + struct amdgpu_ring *buffer_funcs_ring; + /* Scheduler entity for buffer moves */ + struct amd_sched_entity entity; ++ ++ /* custom LRU management */ ++ struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE]; + }; + + int amdgpu_copy_buffer(struct amdgpu_ring *ring, +@@ -440,7 +445,6 @@ struct amdgpu_bo_va_mapping { + + /* bo virtual addresses in a specific vm */ + struct amdgpu_bo_va { +- struct mutex mutex; + /* protected by bo being reserved */ + struct list_head bo_list; + struct fence *last_pt_update; +@@ -499,9 +503,10 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv); + unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); + struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +-struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev, +- struct dma_buf_attachment *attach, +- struct sg_table *sg); ++struct drm_gem_object * ++amdgpu_gem_prime_import_sg_table(struct drm_device *dev, ++ struct dma_buf_attachment *attach, ++ struct sg_table *sg); + struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags); +@@ -591,11 +596,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, + struct amdgpu_sync *sync, + struct reservation_object *resv, + void *owner); ++bool amdgpu_sync_is_idle(struct amdgpu_sync *sync); ++int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src, ++ struct fence *fence); + struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); + int amdgpu_sync_wait(struct amdgpu_sync *sync); + void amdgpu_sync_free(struct amdgpu_sync *sync); + int amdgpu_sync_init(void); + void amdgpu_sync_fini(void); ++int amdgpu_fence_slab_init(void); ++void amdgpu_fence_slab_fini(void); + + /* + * GART structures, functions & helpers +@@ -614,8 +624,9 @@ struct amdgpu_gart { + unsigned num_gpu_pages; + unsigned num_cpu_pages; + unsigned table_size; ++#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + struct page **pages; +- dma_addr_t *pages_addr; ++#endif + bool ready; + const struct amdgpu_gart_funcs *gart_funcs; + }; +@@ -714,6 +725,7 @@ struct amdgpu_flip_work { + unsigned shared_count; + struct fence **shared; + struct fence_cb cb; ++ bool async; + }; + + +@@ -726,17 +738,7 @@ struct amdgpu_ib { + uint32_t length_dw; + uint64_t gpu_addr; + uint32_t *ptr; +- struct amdgpu_user_fence *user; +- struct amdgpu_vm *vm; +- unsigned vm_id; +- uint64_t vm_pd_addr; +- struct amdgpu_ctx *ctx; +- uint32_t gds_base, gds_size; +- uint32_t gws_base, gws_size; +- uint32_t oa_base, oa_size; + uint32_t flags; +- /* resulting sequence number */ +- uint64_t sequence; + }; + + enum amdgpu_ring_type { +@@ -750,7 +752,7 @@ enum amdgpu_ring_type { + extern struct amd_sched_backend_ops amdgpu_sched_ops; + + int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, +- struct amdgpu_job **job); ++ struct amdgpu_job **job, struct amdgpu_vm *vm); + int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, + struct amdgpu_job **job); + +@@ -765,7 +767,7 @@ struct amdgpu_ring { + struct amdgpu_device *adev; + const struct amdgpu_ring_funcs *funcs; + struct amdgpu_fence_driver fence_drv; +- struct amd_gpu_scheduler sched; ++ struct amd_gpu_scheduler sched; + + spinlock_t fence_lock; + struct amdgpu_bo *ring_obj; +@@ -793,7 +795,7 @@ struct amdgpu_ring { + unsigned wptr_offs; + unsigned next_rptr_offs; + unsigned fence_offs; +- struct amdgpu_ctx *current_ctx; ++ uint64_t current_ctx; + enum amdgpu_ring_type type; + char name[16]; + unsigned cond_exe_offs; +@@ -841,16 +843,8 @@ struct amdgpu_vm_pt { + uint64_t addr; + }; + +-struct amdgpu_vm_id { +- struct amdgpu_vm_manager_id *mgr_id; +- uint64_t pd_gpu_addr; +- /* last flushed PD/PT update */ +- struct fence *flushed_updates; +-}; +- + struct amdgpu_vm { + /* tree of virtual addresses mapped */ +- spinlock_t it_lock; + struct rb_root va; + + /* protecting invalidated */ +@@ -874,19 +868,29 @@ struct amdgpu_vm { + struct amdgpu_vm_pt *page_tables; + + /* for id and flush management per ring */ +- struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; ++ struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS]; + + /* protecting freed */ + spinlock_t freed_lock; + + /* Scheduler entity for page table updates */ + struct amd_sched_entity entity; ++ ++ /* client id */ ++ u64 client_id; + }; + +-struct amdgpu_vm_manager_id { ++struct amdgpu_vm_id { + struct list_head list; +- struct fence *active; +- atomic_long_t owner; ++ struct fence *first; ++ struct amdgpu_sync active; ++ struct fence *last_flush; ++ struct amdgpu_ring *last_user; ++ atomic64_t owner; ++ ++ uint64_t pd_gpu_addr; ++ /* last flushed PD/PT update */ ++ struct fence *flushed_updates; + + uint32_t gds_base; + uint32_t gds_size; +@@ -901,7 +905,7 @@ struct amdgpu_vm_manager { + struct mutex lock; + unsigned num_ids; + struct list_head ids_lru; +- struct amdgpu_vm_manager_id ids[AMDGPU_NUM_VM]; ++ struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; + + uint32_t max_pfn; + /* vram base address for page table entry */ +@@ -913,6 +917,8 @@ struct amdgpu_vm_manager { + struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; + unsigned vm_pte_num_rings; + atomic_t vm_pte_next_ring; ++ /* client id counter */ ++ atomic64_t client_counter; + }; + + void amdgpu_vm_manager_init(struct amdgpu_device *adev); +@@ -928,11 +934,11 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, + int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync, struct fence *fence, + unsigned *vm_id, uint64_t *vm_pd_addr); +-void amdgpu_vm_flush(struct amdgpu_ring *ring, +- unsigned vm_id, uint64_t pd_addr, +- uint32_t gds_base, uint32_t gds_size, +- uint32_t gws_base, uint32_t gws_size, +- uint32_t oa_base, uint32_t oa_size); ++int amdgpu_vm_flush(struct amdgpu_ring *ring, ++ unsigned vm_id, uint64_t pd_addr, ++ uint32_t gds_base, uint32_t gds_size, ++ uint32_t gws_base, uint32_t gws_size, ++ uint32_t oa_base, uint32_t oa_size); + void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); + uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); + int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, +@@ -1038,6 +1044,11 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list); + */ + #include "clearstate_defs.h" + ++struct amdgpu_rlc_funcs { ++ void (*enter_safe_mode)(struct amdgpu_device *adev); ++ void (*exit_safe_mode)(struct amdgpu_device *adev); ++}; ++ + struct amdgpu_rlc { + /* for power gating */ + struct amdgpu_bo *save_restore_obj; +@@ -1056,6 +1067,24 @@ struct amdgpu_rlc { + uint64_t cp_table_gpu_addr; + volatile uint32_t *cp_table_ptr; + u32 cp_table_size; ++ ++ /* safe mode for updating CG/PG state */ ++ bool in_safe_mode; ++ const struct amdgpu_rlc_funcs *funcs; ++ ++ /* for firmware data */ ++ u32 save_and_restore_offset; ++ u32 clear_state_descriptor_offset; ++ u32 avail_scratch_ram_locations; ++ u32 reg_restore_list_size; ++ u32 reg_list_format_start; ++ u32 reg_list_format_separate_start; ++ u32 starting_offsets_start; ++ u32 reg_list_format_size_bytes; ++ u32 reg_list_size_bytes; ++ ++ u32 *register_list_format; ++ u32 *register_restore; + }; + + struct amdgpu_mec { +@@ -1109,6 +1138,12 @@ struct amdgpu_gca_config { + uint32_t macrotile_mode_array[16]; + }; + ++struct amdgpu_cu_info { ++ uint32_t number; /* total active CU number */ ++ uint32_t ao_cu_mask; ++ uint32_t bitmap[4][4]; ++}; ++ + struct amdgpu_gfx { + struct mutex gpu_clock_mutex; + struct amdgpu_gca_config config; +@@ -1141,17 +1176,19 @@ struct amdgpu_gfx { + struct amdgpu_irq_src priv_reg_irq; + struct amdgpu_irq_src priv_inst_irq; + /* gfx status */ +- uint32_t gfx_current_status; ++ uint32_t gfx_current_status; + /* ce ram size*/ +- unsigned ce_ram_size; ++ unsigned ce_ram_size; ++ struct amdgpu_cu_info cu_info; + }; + + int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned size, struct amdgpu_ib *ib); +-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f); ++void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, ++ struct fence *f); + int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + struct amdgpu_ib *ib, struct fence *last_vm_update, +- struct fence **f); ++ struct amdgpu_job *job, struct fence **f); + int amdgpu_ib_pool_init(struct amdgpu_device *adev); + void amdgpu_ib_pool_fini(struct amdgpu_device *adev); + int amdgpu_ib_ring_tests(struct amdgpu_device *adev); +@@ -1176,7 +1213,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring); + struct amdgpu_cs_chunk { + uint32_t chunk_id; + uint32_t length_dw; +- uint32_t *kdata; ++ void *kdata; + }; + + struct amdgpu_cs_parser { +@@ -1207,13 +1244,25 @@ struct amdgpu_cs_parser { + struct amdgpu_job { + struct amd_sched_job base; + struct amdgpu_device *adev; ++ struct amdgpu_vm *vm; + struct amdgpu_ring *ring; + struct amdgpu_sync sync; + struct amdgpu_ib *ibs; + struct fence *fence; /* the hw fence */ + uint32_t num_ibs; + void *owner; +- struct amdgpu_user_fence uf; ++ uint64_t ctx; ++ unsigned vm_id; ++ uint64_t vm_pd_addr; ++ uint32_t gds_base, gds_size; ++ uint32_t gws_base, gws_size; ++ uint32_t oa_base, oa_size; ++ ++ /* user fence handling */ ++ struct amdgpu_bo *uf_bo; ++ uint32_t uf_offset; ++ uint64_t uf_sequence; ++ + }; + #define to_amdgpu_job(sched_job) \ + container_of((sched_job), struct amdgpu_job, base) +@@ -1605,6 +1654,8 @@ struct amdgpu_uvd { + struct amdgpu_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; ++ unsigned fw_version; ++ void *saved_bo; + unsigned max_handles; + atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; + struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; +@@ -1658,7 +1709,7 @@ struct amdgpu_sdma { + struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; + struct amdgpu_irq_src trap_irq; + struct amdgpu_irq_src illegal_inst_irq; +- int num_instances; ++ int num_instances; + }; + + /* +@@ -1704,12 +1755,12 @@ static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} + * Debugfs + */ + struct amdgpu_debugfs { +- struct drm_info_list *files; ++ const struct drm_info_list *files; + unsigned num_files; + }; + + int amdgpu_debugfs_add_files(struct amdgpu_device *adev, +- struct drm_info_list *files, ++ const struct drm_info_list *files, + unsigned nfiles); + int amdgpu_debugfs_fence_init(struct amdgpu_device *adev); + +@@ -1751,13 +1802,6 @@ struct amdgpu_allowed_register_entry { + bool grbm_indexed; + }; + +-struct amdgpu_cu_info { +- uint32_t number; /* total active CU number */ +- uint32_t ao_cu_mask; +- uint32_t bitmap[4][4]; +-}; +- +- + /* + * ASIC specific functions. + */ +@@ -1775,7 +1819,6 @@ struct amdgpu_asic_funcs { + u32 (*get_xclk)(struct amdgpu_device *adev); + /* get the gpu clock counter */ + uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); +- int (*get_cu_info)(struct amdgpu_device *adev, struct amdgpu_cu_info *info); + /* MM block clocks */ + int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk); + int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk); +@@ -1868,15 +1911,8 @@ struct amdgpu_atcs { + /* + * CGS + */ +-void *amdgpu_cgs_create_device(struct amdgpu_device *adev); +-void amdgpu_cgs_destroy_device(void *cgs_device); +- +- +-/* +- * CGS +- */ +-void *amdgpu_cgs_create_device(struct amdgpu_device *adev); +-void amdgpu_cgs_destroy_device(void *cgs_device); ++struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev); ++void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device); + + + /* GPU virtualization */ +@@ -1919,13 +1955,13 @@ struct amdgpu_device { + bool shutdown; + bool need_dma32; + bool accel_working; +- struct work_struct reset_work; ++ struct work_struct reset_work; + struct notifier_block acpi_nb; + struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; + struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; +- unsigned debugfs_count; ++ unsigned debugfs_count; + #if defined(CONFIG_DEBUG_FS) +- struct dentry *debugfs_regs; ++ struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; + #endif + struct amdgpu_atif atif; + struct amdgpu_atcs atcs; +@@ -2050,6 +2086,7 @@ struct amdgpu_device { + + /* tracking pinned memory */ + u64 vram_pin_size; ++ u64 invisible_pin_size; + u64 gart_pin_size; + + /* amdkfd interface */ +@@ -2076,7 +2113,6 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); + u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); + void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); + bool amdgpu_device_has_dal_support(struct amdgpu_device *adev); +- + /* + * Registers read & write functions. + */ +@@ -2178,7 +2214,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) + #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev)) + #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) + #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) +-#define amdgpu_asic_get_cu_info(adev, info) (adev)->asic_funcs->get_cu_info((adev), (info)) + #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) + #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) + #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) +@@ -2190,7 +2225,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) + #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) + #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) + #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) +-#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) ++#define amdgpu_ring_emit_ib(r, ib, vm_id, c) (r)->funcs->emit_ib((r), (ib), (vm_id), (c)) + #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) + #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) + #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) +@@ -2213,7 +2248,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) + #define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h)) + #define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev)) + #define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev)) +-#define amdgpu_display_page_flip(adev, crtc, base) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base)) ++#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async)) + #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos)) + #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) + #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) +@@ -2306,6 +2341,12 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) + #define amdgpu_dpm_force_clock_level(adev, type, level) \ + (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level) + ++#define amdgpu_dpm_get_sclk_od(adev) \ ++ (adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle) ++ ++#define amdgpu_dpm_set_sclk_od(adev, value) \ ++ (adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value) ++ + #define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \ + (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output)) + +@@ -2355,7 +2396,7 @@ static inline void amdgpu_unregister_atpx_handler(void) {} + * KMS + */ + extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; +-extern int amdgpu_max_kms_ioctl; ++extern const int amdgpu_max_kms_ioctl; + + int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); + int amdgpu_driver_unload_kms(struct drm_device *dev); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +index 84b0ce3..6830ed4 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +@@ -699,6 +699,36 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev) + return ret; + } + ++union gfx_info { ++ ATOM_GFX_INFO_V2_1 info; ++}; ++ ++int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev) ++{ ++ struct amdgpu_mode_info *mode_info = &adev->mode_info; ++ int index = GetIndexIntoMasterTable(DATA, GFX_Info); ++ uint8_t frev, crev; ++ uint16_t data_offset; ++ int ret = -EINVAL; ++ ++ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, ++ &frev, &crev, &data_offset)) { ++ union gfx_info *gfx_info = (union gfx_info *) ++ (mode_info->atom_context->bios + data_offset); ++ ++ adev->gfx.config.max_shader_engines = gfx_info->info.max_shader_engines; ++ adev->gfx.config.max_tile_pipes = gfx_info->info.max_tile_pipes; ++ adev->gfx.config.max_cu_per_sh = gfx_info->info.max_cu_per_sh; ++ adev->gfx.config.max_sh_per_se = gfx_info->info.max_sh_per_se; ++ adev->gfx.config.max_backends_per_se = gfx_info->info.max_backends_per_se; ++ adev->gfx.config.max_texture_channel_caches = ++ gfx_info->info.max_texture_channel_caches; ++ ++ ret = 0; ++ } ++ return ret; ++} ++ + union igp_info { + struct _ATOM_INTEGRATED_SYSTEM_INFO info; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +index 9e14420..8c2e696 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +@@ -144,6 +144,8 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * + + int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev); + ++int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev); ++ + bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev, + struct amdgpu_atom_ss *ss, + int id, u32 clock); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +index 7a4b101..3283763 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +@@ -42,7 +42,7 @@ struct amdgpu_cgs_device { + struct amdgpu_device *adev = \ + ((struct amdgpu_cgs_device *)cgs_device)->adev + +-static int amdgpu_cgs_gpu_mem_info(void *cgs_device, enum cgs_gpu_mem_type type, ++static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, + uint64_t *mc_start, uint64_t *mc_size, + uint64_t *mem_size) + { +@@ -73,7 +73,7 @@ static int amdgpu_cgs_gpu_mem_info(void *cgs_device, enum cgs_gpu_mem_type type, + return 0; + } + +-static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem, ++static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem, + uint64_t size, + uint64_t min_offset, uint64_t max_offset, + cgs_handle_t *kmem_handle, uint64_t *mcaddr) +@@ -102,7 +102,7 @@ static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem, + return ret; + } + +-static int amdgpu_cgs_gunmap_kmem(void *cgs_device, cgs_handle_t kmem_handle) ++static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle) + { + struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle; + +@@ -118,7 +118,7 @@ static int amdgpu_cgs_gunmap_kmem(void *cgs_device, cgs_handle_t kmem_handle) + return 0; + } + +-static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device, ++static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, + enum cgs_gpu_mem_type type, + uint64_t size, uint64_t align, + uint64_t min_offset, uint64_t max_offset, +@@ -208,7 +208,7 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device, + return ret; + } + +-static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle) ++static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) + { + struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; + +@@ -225,7 +225,7 @@ static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle) + return 0; + } + +-static int amdgpu_cgs_gmap_gpu_mem(void *cgs_device, cgs_handle_t handle, ++static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, + uint64_t *mcaddr) + { + int r; +@@ -246,7 +246,7 @@ static int amdgpu_cgs_gmap_gpu_mem(void *cgs_device, cgs_handle_t handle, + return r; + } + +-static int amdgpu_cgs_gunmap_gpu_mem(void *cgs_device, cgs_handle_t handle) ++static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) + { + int r; + struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; +@@ -258,7 +258,7 @@ static int amdgpu_cgs_gunmap_gpu_mem(void *cgs_device, cgs_handle_t handle) + return r; + } + +-static int amdgpu_cgs_kmap_gpu_mem(void *cgs_device, cgs_handle_t handle, ++static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, + void **map) + { + int r; +@@ -271,7 +271,7 @@ static int amdgpu_cgs_kmap_gpu_mem(void *cgs_device, cgs_handle_t handle, + return r; + } + +-static int amdgpu_cgs_kunmap_gpu_mem(void *cgs_device, cgs_handle_t handle) ++static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) + { + int r; + struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; +@@ -283,20 +283,20 @@ static int amdgpu_cgs_kunmap_gpu_mem(void *cgs_device, cgs_handle_t handle) + return r; + } + +-static uint32_t amdgpu_cgs_read_register(void *cgs_device, unsigned offset) ++static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset) + { + CGS_FUNC_ADEV; + return RREG32(offset); + } + +-static void amdgpu_cgs_write_register(void *cgs_device, unsigned offset, ++static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned offset, + uint32_t value) + { + CGS_FUNC_ADEV; + WREG32(offset, value); + } + +-static uint32_t amdgpu_cgs_read_ind_register(void *cgs_device, ++static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device, + enum cgs_ind_reg space, + unsigned index) + { +@@ -320,7 +320,7 @@ static uint32_t amdgpu_cgs_read_ind_register(void *cgs_device, + return 0; + } + +-static void amdgpu_cgs_write_ind_register(void *cgs_device, ++static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, + enum cgs_ind_reg space, + unsigned index, uint32_t value) + { +@@ -343,7 +343,7 @@ static void amdgpu_cgs_write_ind_register(void *cgs_device, + WARN(1, "Invalid indirect register space"); + } + +-static uint8_t amdgpu_cgs_read_pci_config_byte(void *cgs_device, unsigned addr) ++static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr) + { + CGS_FUNC_ADEV; + uint8_t val; +@@ -353,7 +353,7 @@ static uint8_t amdgpu_cgs_read_pci_config_byte(void *cgs_device, unsigned addr) + return val; + } + +-static uint16_t amdgpu_cgs_read_pci_config_word(void *cgs_device, unsigned addr) ++static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr) + { + CGS_FUNC_ADEV; + uint16_t val; +@@ -363,7 +363,7 @@ static uint16_t amdgpu_cgs_read_pci_config_word(void *cgs_device, unsigned addr) + return val; + } + +-static uint32_t amdgpu_cgs_read_pci_config_dword(void *cgs_device, ++static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device, + unsigned addr) + { + CGS_FUNC_ADEV; +@@ -374,7 +374,7 @@ static uint32_t amdgpu_cgs_read_pci_config_dword(void *cgs_device, + return val; + } + +-static void amdgpu_cgs_write_pci_config_byte(void *cgs_device, unsigned addr, ++static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, unsigned addr, + uint8_t value) + { + CGS_FUNC_ADEV; +@@ -382,7 +382,7 @@ static void amdgpu_cgs_write_pci_config_byte(void *cgs_device, unsigned addr, + WARN(ret, "pci_write_config_byte error"); + } + +-static void amdgpu_cgs_write_pci_config_word(void *cgs_device, unsigned addr, ++static void amdgpu_cgs_write_pci_config_word(struct cgs_device *cgs_device, unsigned addr, + uint16_t value) + { + CGS_FUNC_ADEV; +@@ -390,7 +390,7 @@ static void amdgpu_cgs_write_pci_config_word(void *cgs_device, unsigned addr, + WARN(ret, "pci_write_config_word error"); + } + +-static void amdgpu_cgs_write_pci_config_dword(void *cgs_device, unsigned addr, ++static void amdgpu_cgs_write_pci_config_dword(struct cgs_device *cgs_device, unsigned addr, + uint32_t value) + { + CGS_FUNC_ADEV; +@@ -399,7 +399,7 @@ static void amdgpu_cgs_write_pci_config_dword(void *cgs_device, unsigned addr, + } + + +-static int amdgpu_cgs_get_pci_resource(void *cgs_device, ++static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device, + enum cgs_resource_type resource_type, + uint64_t size, + uint64_t offset, +@@ -433,7 +433,7 @@ static int amdgpu_cgs_get_pci_resource(void *cgs_device, + } + } + +-static const void *amdgpu_cgs_atom_get_data_table(void *cgs_device, ++static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device, + unsigned table, uint16_t *size, + uint8_t *frev, uint8_t *crev) + { +@@ -449,7 +449,7 @@ static const void *amdgpu_cgs_atom_get_data_table(void *cgs_device, + return NULL; + } + +-static int amdgpu_cgs_atom_get_cmd_table_revs(void *cgs_device, unsigned table, ++static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table, + uint8_t *frev, uint8_t *crev) + { + CGS_FUNC_ADEV; +@@ -462,7 +462,7 @@ static int amdgpu_cgs_atom_get_cmd_table_revs(void *cgs_device, unsigned table, + return -EINVAL; + } + +-static int amdgpu_cgs_atom_exec_cmd_table(void *cgs_device, unsigned table, ++static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table, + void *args) + { + CGS_FUNC_ADEV; +@@ -471,33 +471,33 @@ static int amdgpu_cgs_atom_exec_cmd_table(void *cgs_device, unsigned table, + adev->mode_info.atom_context, table, args); + } + +-static int amdgpu_cgs_create_pm_request(void *cgs_device, cgs_handle_t *request) ++static int amdgpu_cgs_create_pm_request(struct cgs_device *cgs_device, cgs_handle_t *request) + { + /* TODO */ + return 0; + } + +-static int amdgpu_cgs_destroy_pm_request(void *cgs_device, cgs_handle_t request) ++static int amdgpu_cgs_destroy_pm_request(struct cgs_device *cgs_device, cgs_handle_t request) + { + /* TODO */ + return 0; + } + +-static int amdgpu_cgs_set_pm_request(void *cgs_device, cgs_handle_t request, ++static int amdgpu_cgs_set_pm_request(struct cgs_device *cgs_device, cgs_handle_t request, + int active) + { + /* TODO */ + return 0; + } + +-static int amdgpu_cgs_pm_request_clock(void *cgs_device, cgs_handle_t request, ++static int amdgpu_cgs_pm_request_clock(struct cgs_device *cgs_device, cgs_handle_t request, + enum cgs_clock clock, unsigned freq) + { + /* TODO */ + return 0; + } + +-static int amdgpu_cgs_pm_request_engine(void *cgs_device, cgs_handle_t request, ++static int amdgpu_cgs_pm_request_engine(struct cgs_device *cgs_device, cgs_handle_t request, + enum cgs_engine engine, int powered) + { + /* TODO */ +@@ -506,7 +506,7 @@ static int amdgpu_cgs_pm_request_engine(void *cgs_device, cgs_handle_t request, + + + +-static int amdgpu_cgs_pm_query_clock_limits(void *cgs_device, ++static int amdgpu_cgs_pm_query_clock_limits(struct cgs_device *cgs_device, + enum cgs_clock clock, + struct cgs_clock_limits *limits) + { +@@ -514,7 +514,7 @@ static int amdgpu_cgs_pm_query_clock_limits(void *cgs_device, + return 0; + } + +-static int amdgpu_cgs_set_camera_voltages(void *cgs_device, uint32_t mask, ++static int amdgpu_cgs_set_camera_voltages(struct cgs_device *cgs_device, uint32_t mask, + const uint32_t *voltages) + { + DRM_ERROR("not implemented"); +@@ -612,7 +612,7 @@ static int amdgpu_cgs_irq_put(void *cgs_device, unsigned src_id, unsigned type) + return amdgpu_irq_put(adev, adev->irq.sources[src_id], type); + } + +-int amdgpu_cgs_set_clockgating_state(void *cgs_device, ++int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, + enum amd_ip_block_type block_type, + enum amd_clockgating_state state) + { +@@ -633,7 +633,7 @@ int amdgpu_cgs_set_clockgating_state(void *cgs_device, + return r; + } + +-int amdgpu_cgs_set_powergating_state(void *cgs_device, ++int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device, + enum amd_ip_block_type block_type, + enum amd_powergating_state state) + { +@@ -655,7 +655,7 @@ int amdgpu_cgs_set_powergating_state(void *cgs_device, + } + + +-static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type) ++static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) + { + CGS_FUNC_ADEV; + enum AMDGPU_UCODE_ID result = AMDGPU_UCODE_ID_MAXIMUM; +@@ -681,9 +681,10 @@ static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type) + result = AMDGPU_UCODE_ID_CP_MEC1; + break; + case CGS_UCODE_ID_CP_MEC_JT2: +- if (adev->asic_type == CHIP_TONGA) ++ if (adev->asic_type == CHIP_TONGA || adev->asic_type == CHIP_POLARIS11 ++ || adev->asic_type == CHIP_POLARIS10) + result = AMDGPU_UCODE_ID_CP_MEC2; +- else if (adev->asic_type == CHIP_CARRIZO) ++ else + result = AMDGPU_UCODE_ID_CP_MEC1; + break; + case CGS_UCODE_ID_RLC_G: +@@ -695,13 +696,13 @@ static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type) + return result; + } + +-static int amdgpu_cgs_get_firmware_info(void *cgs_device, ++static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, + enum cgs_ucode_id type, + struct cgs_firmware_info *info) + { + CGS_FUNC_ADEV; + +- if (CGS_UCODE_ID_SMU != type) { ++ if ((CGS_UCODE_ID_SMU != type) && (CGS_UCODE_ID_SMU_SK != type)) { + uint64_t gpu_addr; + uint32_t data_size; + const struct gfx_firmware_header_v1_0 *header; +@@ -734,30 +735,44 @@ static int amdgpu_cgs_get_firmware_info(void *cgs_device, + const uint8_t *src; + const struct smc_firmware_header_v1_0 *hdr; + +- switch (adev->asic_type) { +- case CHIP_TONGA: +- strcpy(fw_name, "amdgpu/tonga_smc.bin"); +- break; +- case CHIP_FIJI: +- strcpy(fw_name, "amdgpu/fiji_smc.bin"); +- break; +- default: +- DRM_ERROR("SMC firmware not supported\n"); +- return -EINVAL; +- } ++ if (!adev->pm.fw) { ++ switch (adev->asic_type) { ++ case CHIP_TONGA: ++ strcpy(fw_name, "amdgpu/tonga_smc.bin"); ++ break; ++ case CHIP_FIJI: ++ strcpy(fw_name, "amdgpu/fiji_smc.bin"); ++ break; ++ case CHIP_POLARIS11: ++ if (type == CGS_UCODE_ID_SMU) ++ strcpy(fw_name, "amdgpu/polaris11_smc.bin"); ++ else if (type == CGS_UCODE_ID_SMU_SK) ++ strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); ++ break; ++ case CHIP_POLARIS10: ++ if (type == CGS_UCODE_ID_SMU) ++ strcpy(fw_name, "amdgpu/polaris10_smc.bin"); ++ else if (type == CGS_UCODE_ID_SMU_SK) ++ strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); ++ break; ++ default: ++ DRM_ERROR("SMC firmware not supported\n"); ++ return -EINVAL; ++ } + +- err = request_firmware(&adev->pm.fw, fw_name, adev->dev); +- if (err) { +- DRM_ERROR("Failed to request firmware\n"); +- return err; +- } ++ err = request_firmware(&adev->pm.fw, fw_name, adev->dev); ++ if (err) { ++ DRM_ERROR("Failed to request firmware\n"); ++ return err; ++ } + +- err = amdgpu_ucode_validate(adev->pm.fw); +- if (err) { +- DRM_ERROR("Failed to load firmware \"%s\"", fw_name); +- release_firmware(adev->pm.fw); +- adev->pm.fw = NULL; +- return err; ++ err = amdgpu_ucode_validate(adev->pm.fw); ++ if (err) { ++ DRM_ERROR("Failed to load firmware \"%s\"", fw_name); ++ release_firmware(adev->pm.fw); ++ adev->pm.fw = NULL; ++ return err; ++ } + } + + hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; +@@ -774,7 +789,7 @@ static int amdgpu_cgs_get_firmware_info(void *cgs_device, + return 0; + } + +-static int amdgpu_cgs_query_system_info(void *cgs_device, ++static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device, + struct cgs_system_info *sys_info) + { + CGS_FUNC_ADEV; +@@ -801,6 +816,9 @@ static int amdgpu_cgs_query_system_info(void *cgs_device, + case CGS_SYSTEM_INFO_PG_FLAGS: + sys_info->value = adev->pg_flags; + break; ++ case CGS_SYSTEM_INFO_GFX_CU_INFO: ++ sys_info->value = adev->gfx.cu_info.number; ++ break; + default: + return -ENODEV; + } +@@ -808,7 +826,7 @@ static int amdgpu_cgs_query_system_info(void *cgs_device, + return 0; + } + +-static int amdgpu_cgs_get_active_displays_info(void *cgs_device, ++static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, + struct cgs_display_info *info) + { + CGS_FUNC_ADEV; +@@ -816,10 +834,13 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device, + struct drm_device *ddev = adev->ddev; + struct drm_crtc *crtc; + uint32_t line_time_us, vblank_lines; ++ struct cgs_mode_info *mode_info; + + if (info == NULL) + return -EINVAL; + ++ mode_info = info->mode_info; ++ + if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { + list_for_each_entry(crtc, + &ddev->mode_config.crtc_list, head) { +@@ -828,7 +849,7 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device, + info->active_display_mask |= (1 << amdgpu_crtc->crtc_id); + info->display_count++; + } +- if (info->mode_info != NULL && ++ if (mode_info != NULL && + crtc->enabled && amdgpu_crtc->enabled && + amdgpu_crtc->hw_mode.clock) { + line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) / +@@ -836,10 +857,10 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device, + vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end - + amdgpu_crtc->hw_mode.crtc_vdisplay + + (amdgpu_crtc->v_border * 2); +- info->mode_info->vblank_time_us = vblank_lines * line_time_us; +- info->mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); +- info->mode_info->ref_clock = adev->clock.spll.reference_freq; +- info->mode_info++; ++ mode_info->vblank_time_us = vblank_lines * line_time_us; ++ mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); ++ mode_info->ref_clock = adev->clock.spll.reference_freq; ++ mode_info = NULL; + } + } + } +@@ -847,6 +868,16 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device, + return 0; + } + ++ ++static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled) ++{ ++ CGS_FUNC_ADEV; ++ ++ adev->pm.dpm_enabled = enabled; ++ ++ return 0; ++} ++ + /** \brief evaluate acpi namespace object, handle or pathname must be valid + * \param cgs_device + * \param info input/output arguments for the control method +@@ -854,7 +885,7 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device, + */ + + #if defined(CONFIG_ACPI) +-static int amdgpu_cgs_acpi_eval_object(void *cgs_device, ++static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, + struct cgs_acpi_method_info *info) + { + CGS_FUNC_ADEV; +@@ -1017,14 +1048,14 @@ error: + return result; + } + #else +-static int amdgpu_cgs_acpi_eval_object(void *cgs_device, ++static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, + struct cgs_acpi_method_info *info) + { + return -EIO; + } + #endif + +-int amdgpu_cgs_call_acpi_method(void *cgs_device, ++int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device, + uint32_t acpi_method, + uint32_t acpi_function, + void *pinput, void *poutput, +@@ -1097,6 +1128,7 @@ static const struct cgs_ops amdgpu_cgs_ops = { + amdgpu_cgs_set_powergating_state, + amdgpu_cgs_set_clockgating_state, + amdgpu_cgs_get_active_displays_info, ++ amdgpu_cgs_notify_dpm_enabled, + amdgpu_cgs_call_acpi_method, + amdgpu_cgs_query_system_info, + }; +@@ -1107,7 +1139,7 @@ static const struct cgs_os_ops amdgpu_cgs_os_ops = { + amdgpu_cgs_irq_put + }; + +-void *amdgpu_cgs_create_device(struct amdgpu_device *adev) ++struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) + { + struct amdgpu_cgs_device *cgs_device = + kmalloc(sizeof(*cgs_device), GFP_KERNEL); +@@ -1121,10 +1153,10 @@ void *amdgpu_cgs_create_device(struct amdgpu_device *adev) + cgs_device->base.os_ops = &amdgpu_cgs_os_ops; + cgs_device->adev = adev; + +- return cgs_device; ++ return (struct cgs_device *)cgs_device; + } + +-void amdgpu_cgs_destroy_device(void *cgs_device) ++void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device) + { + kfree(cgs_device); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index d7e0b0b..2bbeeb0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -24,7 +24,6 @@ + * Authors: + * Jerome Glisse <glisse@freedesktop.org> + */ +-#include <linux/list_sort.h> + #include <linux/pagemap.h> + #include <drm/drmP.h> + #include <drm/amdgpu_drm.h> +@@ -88,44 +87,42 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, + } + + static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, +- struct amdgpu_user_fence *uf, +- struct drm_amdgpu_cs_chunk_fence *fence_data) ++ struct drm_amdgpu_cs_chunk_fence *data, ++ uint32_t *offset) + { + struct drm_gem_object *gobj; +- uint32_t handle; + +- handle = fence_data->handle; + gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, +- fence_data->handle); ++ data->handle); + if (gobj == NULL) + return -EINVAL; + +- uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); +- uf->offset = fence_data->offset; +- +- if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { +- drm_gem_object_unreference_unlocked(gobj); +- return -EINVAL; +- } +- +- p->uf_entry.robj = amdgpu_bo_ref(uf->bo); ++ p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + p->uf_entry.priority = 0; + p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; + p->uf_entry.tv.shared = true; + p->uf_entry.user_pages = NULL; ++ *offset = data->offset; + + drm_gem_object_unreference_unlocked(gobj); ++ ++ if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { ++ amdgpu_bo_unref(&p->uf_entry.robj); ++ return -EINVAL; ++ } ++ + return 0; + } + + int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) + { + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; ++ struct amdgpu_vm *vm = &fpriv->vm; + union drm_amdgpu_cs *cs = data; + uint64_t *chunk_array_user; + uint64_t *chunk_array; +- struct amdgpu_user_fence uf = {}; + unsigned size, num_ibs = 0; ++ uint32_t uf_offset = 0; + int i; + int ret; + +@@ -200,7 +197,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) + goto free_partial_kdata; + } + +- ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); ++ ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, ++ &uf_offset); + if (ret) + goto free_partial_kdata; + +@@ -215,11 +213,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) + } + } + +- ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job); ++ ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm); + if (ret) + goto free_all_kdata; + +- p->job->uf = uf; ++ if (p->uf_entry.robj) { ++ p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj); ++ p->job->uf_offset = uf_offset; ++ } + + kfree(chunk_array); + return 0; +@@ -377,7 +378,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, + INIT_LIST_HEAD(&duplicates); + amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); + +- if (p->job->uf.bo) ++ if (p->uf_entry.robj) + list_add(&p->uf_entry.tv.head, &p->validated); + + if (need_mmap_lock) +@@ -473,6 +474,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, + goto error_validate; + + if (p->bo_list) { ++ struct amdgpu_bo *gds = p->bo_list->gds_obj; ++ struct amdgpu_bo *gws = p->bo_list->gws_obj; ++ struct amdgpu_bo *oa = p->bo_list->oa_obj; + struct amdgpu_vm *vm = &fpriv->vm; + unsigned i; + +@@ -481,6 +485,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, + + p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); + } ++ ++ if (gds) { ++ p->job->gds_base = amdgpu_bo_gpu_offset(gds); ++ p->job->gds_size = amdgpu_bo_size(gds); ++ } ++ if (gws) { ++ p->job->gws_base = amdgpu_bo_gpu_offset(gws); ++ p->job->gws_size = amdgpu_bo_size(gws); ++ } ++ if (oa) { ++ p->job->oa_base = amdgpu_bo_gpu_offset(oa); ++ p->job->oa_size = amdgpu_bo_size(oa); ++ } + } + + error_validate: +@@ -527,16 +544,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) + return 0; + } + +-static int cmp_size_smaller_first(void *priv, struct list_head *a, +- struct list_head *b) +-{ +- struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head); +- struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head); +- +- /* Sort A before B if A is smaller. */ +- return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; +-} +- + /** + * cs_parser_fini() - clean parser states + * @parser: parser structure holding parsing context. +@@ -553,18 +560,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo + if (!error) { + amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); + +- /* Sort the buffer list from the smallest to largest buffer, +- * which affects the order of buffers in the LRU list. +- * This assures that the smallest buffers are added first +- * to the LRU list, so they are likely to be later evicted +- * first, instead of large buffers whose eviction is more +- * expensive. +- * +- * This slightly lowers the number of bytes moved by TTM +- * per frame under memory pressure. +- */ +- list_sort(NULL, &parser->validated, cmp_size_smaller_first); +- + ttm_eu_fence_buffer_objects(&parser->ticket, + &parser->validated, + parser->fence); +@@ -763,41 +758,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, + + ib->length_dw = chunk_ib->ib_bytes / 4; + ib->flags = chunk_ib->flags; +- ib->ctx = parser->ctx; + j++; + } + +- /* add GDS resources to first IB */ +- if (parser->bo_list) { +- struct amdgpu_bo *gds = parser->bo_list->gds_obj; +- struct amdgpu_bo *gws = parser->bo_list->gws_obj; +- struct amdgpu_bo *oa = parser->bo_list->oa_obj; +- struct amdgpu_ib *ib = &parser->job->ibs[0]; +- +- if (gds) { +- ib->gds_base = amdgpu_bo_gpu_offset(gds); +- ib->gds_size = amdgpu_bo_size(gds); +- } +- if (gws) { +- ib->gws_base = amdgpu_bo_gpu_offset(gws); +- ib->gws_size = amdgpu_bo_size(gws); +- } +- if (oa) { +- ib->oa_base = amdgpu_bo_gpu_offset(oa); +- ib->oa_size = amdgpu_bo_size(oa); +- } +- } +- /* wrap the last IB with user fence */ +- if (parser->job->uf.bo) { +- struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; +- +- /* UVD & VCE fw doesn't support user fences */ +- if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || +- parser->job->ring->type == AMDGPU_RING_TYPE_VCE) +- return -EINVAL; +- +- ib->user = &parser->job->uf; +- } ++ /* UVD & VCE fw doesn't support user fences */ ++ if (parser->job->uf_bo && ( ++ parser->job->ring->type == AMDGPU_RING_TYPE_UVD || ++ parser->job->ring->type == AMDGPU_RING_TYPE_VCE)) ++ return -EINVAL; + + return 0; + } +@@ -862,6 +830,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) + { + struct amdgpu_ring *ring = p->job->ring; ++ struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + struct fence *fence; + struct amdgpu_job *job; + int r; +@@ -870,19 +839,19 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, + p->job = NULL; + + r = amd_sched_job_init(&job->base, &ring->sched, +- &p->ctx->rings[ring->idx].entity, +- amdgpu_job_timeout_func, +- amdgpu_job_free_func, +- p->filp, &fence); ++ entity, amdgpu_job_timeout_func, ++ amdgpu_job_free_func, ++ p->filp, &fence); + if (r) { + amdgpu_job_free(job); + return r; + } + + job->owner = p->filp; ++ job->ctx = entity->fence_context; + p->fence = fence_get(fence); + cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence); +- job->ibs[job->num_ibs - 1].sequence = cs->out.handle; ++ job->uf_sequence = cs->out.handle; + + trace_amdgpu_cs_ioctl(job); + amd_sched_entity_push_job(&job->base); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index 6e38497..04d5a38 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -59,15 +59,11 @@ static const char *amdgpu_asic_name[] = { + "FIJI", + "CARRIZO", + "STONEY", ++ "POLARIS10", ++ "POLARIS11", + "LAST", + }; + +-#if defined(CONFIG_VGA_SWITCHEROO) +-bool amdgpu_has_atpx_dgpu_power_cntl(void); +-#else +-static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; } +-#endif +- + bool amdgpu_device_is_px(struct drm_device *dev) + { + struct amdgpu_device *adev = dev->dev_private; +@@ -352,7 +348,7 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev) + adev->doorbell.base = pci_resource_start(adev->pdev, 2); + adev->doorbell.size = pci_resource_len(adev->pdev, 2); + +- adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), ++ adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), + AMDGPU_DOORBELL_MAX_ASSIGNMENT+1); + if (adev->doorbell.num_doorbells == 0) + return -EINVAL; +@@ -942,15 +938,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) + } + + if (amdgpu_gart_size != -1) { +- /* gtt size must be power of two and greater or equal to 32M */ ++ /* gtt size must be greater or equal to 32M */ + if (amdgpu_gart_size < 32) { + dev_warn(adev->dev, "gart size (%d) too small\n", + amdgpu_gart_size); + amdgpu_gart_size = -1; +- } else if (!amdgpu_check_pot_argument(amdgpu_gart_size)) { +- dev_warn(adev->dev, "gart size (%d) must be a power of 2\n", +- amdgpu_gart_size); +- amdgpu_gart_size = -1; + } + } + +@@ -1150,6 +1142,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev) + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: ++ case CHIP_POLARIS11: ++ case CHIP_POLARIS10: + case CHIP_CARRIZO: + case CHIP_STONEY: + if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) +@@ -1202,7 +1196,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev) + if (r == -ENOENT) { + adev->ip_block_status[i].valid = false; + } else if (r) { +- DRM_ERROR("early_init %d failed %d\n", i, r); ++ DRM_ERROR("early_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + return r; + } else { + adev->ip_block_status[i].valid = true; +@@ -1225,7 +1219,7 @@ static int amdgpu_init(struct amdgpu_device *adev) + continue; + r = adev->ip_blocks[i].funcs->sw_init((void *)adev); + if (r) { +- DRM_ERROR("sw_init %d failed %d\n", i, r); ++ DRM_ERROR("sw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + return r; + } + adev->ip_block_status[i].sw = true; +@@ -1258,7 +1252,7 @@ static int amdgpu_init(struct amdgpu_device *adev) + continue; + r = adev->ip_blocks[i].funcs->hw_init((void *)adev); + if (r) { +- DRM_ERROR("hw_init %d failed %d\n", i, r); ++ DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + return r; + } + adev->ip_block_status[i].hw = true; +@@ -1278,13 +1272,13 @@ static int amdgpu_late_init(struct amdgpu_device *adev) + r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, + AMD_CG_STATE_GATE); + if (r) { +- DRM_ERROR("set_clockgating_state(gate) %d failed %d\n", i, r); ++ DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + return r; + } + if (adev->ip_blocks[i].funcs->late_init) { + r = adev->ip_blocks[i].funcs->late_init((void *)adev); + if (r) { +- DRM_ERROR("late_init %d failed %d\n", i, r); ++ DRM_ERROR("late_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + return r; + } + } +@@ -1308,13 +1302,13 @@ static int amdgpu_fini(struct amdgpu_device *adev) + r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, + AMD_CG_STATE_UNGATE); + if (r) { +- DRM_ERROR("set_clockgating_state(ungate) %d failed %d\n", i, r); ++ DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + return r; + } + r = adev->ip_blocks[i].funcs->hw_fini((void *)adev); + /* XXX handle errors */ + if (r) { +- DRM_DEBUG("hw_fini %d failed %d\n", i, r); ++ DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + } + adev->ip_block_status[i].hw = false; + } +@@ -1325,7 +1319,7 @@ static int amdgpu_fini(struct amdgpu_device *adev) + r = adev->ip_blocks[i].funcs->sw_fini((void *)adev); + /* XXX handle errors */ + if (r) { +- DRM_DEBUG("sw_fini %d failed %d\n", i, r); ++ DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + } + adev->ip_block_status[i].sw = false; + adev->ip_block_status[i].valid = false; +@@ -1338,20 +1332,29 @@ static int amdgpu_suspend(struct amdgpu_device *adev) + { + int i, r; + ++ /* ungate SMC block first */ ++ r = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, ++ AMD_CG_STATE_UNGATE); ++ if (r) { ++ DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n",r); ++ } ++ + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (!adev->ip_block_status[i].valid) + continue; + /* ungate blocks so that suspend can properly shut them down */ +- r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, +- AMD_CG_STATE_UNGATE); +- if (r) { +- DRM_ERROR("set_clockgating_state(ungate) %d failed %d\n", i, r); ++ if (i != AMD_IP_BLOCK_TYPE_SMC) { ++ r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, ++ AMD_CG_STATE_UNGATE); ++ if (r) { ++ DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); ++ } + } + /* XXX handle errors */ + r = adev->ip_blocks[i].funcs->suspend(adev); + /* XXX handle errors */ + if (r) { +- DRM_ERROR("suspend %d failed %d\n", i, r); ++ DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + } + } + +@@ -1367,7 +1370,7 @@ static int amdgpu_resume(struct amdgpu_device *adev) + continue; + r = adev->ip_blocks[i].funcs->resume(adev); + if (r) { +- DRM_ERROR("resume %d failed %d\n", i, r); ++ DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + return r; + } + } +@@ -1391,9 +1394,11 @@ bool amdgpu_device_has_dal_support(struct amdgpu_device *adev) + case CHIP_HAWAII: + return amdgpu_dal != 0; + #endif +-#if defined(CONFIG_DRM_AMD_DAL) && defined(CONFIG_DRM_AMD_DAL_DCE11_0) ++#if defined(CONFIG_DRM_AMD_DAL) && (defined(CONFIG_DRM_AMD_DAL_DCE11_0) || defined(CONFIG_DRM_AMD_DAL_DCE11_2)) + case CHIP_CARRIZO: + case CHIP_STONEY: ++ case CHIP_POLARIS11: ++ case CHIP_POLARIS10: + return amdgpu_dal != 0; + #endif + #if defined(CONFIG_DRM_AMD_DAL) && defined(CONFIG_DRM_AMD_DAL_DCE10_0) +@@ -1517,7 +1522,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, + + if (amdgpu_runtime_pm == 1) + runtime = true; +- if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl()) ++ if (amdgpu_device_is_px(ddev)) + runtime = true; + vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime); + if (runtime) +@@ -1812,6 +1817,9 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon) + if (r) + DRM_ERROR("amdgpu_resume failed (%d).\n", r); + ++ if (r) ++ DRM_ERROR("amdgpu_resume failed (%d).\n", r); ++ + amdgpu_fence_driver_resume(adev); + + if (resume) { +@@ -2066,7 +2074,7 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev) + * Debugfs + */ + int amdgpu_debugfs_add_files(struct amdgpu_device *adev, +- struct drm_info_list *files, ++ const struct drm_info_list *files, + unsigned nfiles) + { + unsigned i; +@@ -2178,32 +2186,246 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, + return result; + } + ++static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ struct amdgpu_device *adev = f->f_inode->i_private; ++ ssize_t result = 0; ++ int r; ++ ++ if (size & 0x3 || *pos & 0x3) ++ return -EINVAL; ++ ++ while (size) { ++ uint32_t value; ++ ++ value = RREG32_PCIE(*pos >> 2); ++ r = put_user(value, (uint32_t *)buf); ++ if (r) ++ return r; ++ ++ result += 4; ++ buf += 4; ++ *pos += 4; ++ size -= 4; ++ } ++ ++ return result; ++} ++ ++static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ struct amdgpu_device *adev = f->f_inode->i_private; ++ ssize_t result = 0; ++ int r; ++ ++ if (size & 0x3 || *pos & 0x3) ++ return -EINVAL; ++ ++ while (size) { ++ uint32_t value; ++ ++ r = get_user(value, (uint32_t *)buf); ++ if (r) ++ return r; ++ ++ WREG32_PCIE(*pos >> 2, value); ++ ++ result += 4; ++ buf += 4; ++ *pos += 4; ++ size -= 4; ++ } ++ ++ return result; ++} ++ ++static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ struct amdgpu_device *adev = f->f_inode->i_private; ++ ssize_t result = 0; ++ int r; ++ ++ if (size & 0x3 || *pos & 0x3) ++ return -EINVAL; ++ ++ while (size) { ++ uint32_t value; ++ ++ value = RREG32_DIDT(*pos >> 2); ++ r = put_user(value, (uint32_t *)buf); ++ if (r) ++ return r; ++ ++ result += 4; ++ buf += 4; ++ *pos += 4; ++ size -= 4; ++ } ++ ++ return result; ++} ++ ++static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ struct amdgpu_device *adev = f->f_inode->i_private; ++ ssize_t result = 0; ++ int r; ++ ++ if (size & 0x3 || *pos & 0x3) ++ return -EINVAL; ++ ++ while (size) { ++ uint32_t value; ++ ++ r = get_user(value, (uint32_t *)buf); ++ if (r) ++ return r; ++ ++ WREG32_DIDT(*pos >> 2, value); ++ ++ result += 4; ++ buf += 4; ++ *pos += 4; ++ size -= 4; ++ } ++ ++ return result; ++} ++ ++static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ struct amdgpu_device *adev = f->f_inode->i_private; ++ ssize_t result = 0; ++ int r; ++ ++ if (size & 0x3 || *pos & 0x3) ++ return -EINVAL; ++ ++ while (size) { ++ uint32_t value; ++ ++ value = RREG32_SMC(*pos >> 2); ++ r = put_user(value, (uint32_t *)buf); ++ if (r) ++ return r; ++ ++ result += 4; ++ buf += 4; ++ *pos += 4; ++ size -= 4; ++ } ++ ++ return result; ++} ++ ++static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ struct amdgpu_device *adev = f->f_inode->i_private; ++ ssize_t result = 0; ++ int r; ++ ++ if (size & 0x3 || *pos & 0x3) ++ return -EINVAL; ++ ++ while (size) { ++ uint32_t value; ++ ++ r = get_user(value, (uint32_t *)buf); ++ if (r) ++ return r; ++ ++ WREG32_SMC(*pos >> 2, value); ++ ++ result += 4; ++ buf += 4; ++ *pos += 4; ++ size -= 4; ++ } ++ ++ return result; ++} ++ + static const struct file_operations amdgpu_debugfs_regs_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_read, + .write = amdgpu_debugfs_regs_write, + .llseek = default_llseek + }; ++static const struct file_operations amdgpu_debugfs_regs_didt_fops = { ++ .owner = THIS_MODULE, ++ .read = amdgpu_debugfs_regs_didt_read, ++ .write = amdgpu_debugfs_regs_didt_write, ++ .llseek = default_llseek ++}; ++static const struct file_operations amdgpu_debugfs_regs_pcie_fops = { ++ .owner = THIS_MODULE, ++ .read = amdgpu_debugfs_regs_pcie_read, ++ .write = amdgpu_debugfs_regs_pcie_write, ++ .llseek = default_llseek ++}; ++static const struct file_operations amdgpu_debugfs_regs_smc_fops = { ++ .owner = THIS_MODULE, ++ .read = amdgpu_debugfs_regs_smc_read, ++ .write = amdgpu_debugfs_regs_smc_write, ++ .llseek = default_llseek ++}; ++ ++static const struct file_operations *debugfs_regs[] = { ++ &amdgpu_debugfs_regs_fops, ++ &amdgpu_debugfs_regs_didt_fops, ++ &amdgpu_debugfs_regs_pcie_fops, ++ &amdgpu_debugfs_regs_smc_fops, ++}; ++ ++static const char *debugfs_regs_names[] = { ++ "amdgpu_regs", ++ "amdgpu_regs_didt", ++ "amdgpu_regs_pcie", ++ "amdgpu_regs_smc", ++}; + + static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) + { + struct drm_minor *minor = adev->ddev->primary; + struct dentry *ent, *root = minor->debugfs_root; ++ unsigned i, j; ++ ++ for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { ++ ent = debugfs_create_file(debugfs_regs_names[i], ++ S_IFREG | S_IRUGO, root, ++ adev, debugfs_regs[i]); ++ if (IS_ERR(ent)) { ++ for (j = 0; j < i; j++) { ++ debugfs_remove(adev->debugfs_regs[i]); ++ adev->debugfs_regs[i] = NULL; ++ } ++ return PTR_ERR(ent); ++ } + +- ent = debugfs_create_file("amdgpu_regs", S_IFREG | S_IRUGO, root, +- adev, &amdgpu_debugfs_regs_fops); +- if (IS_ERR(ent)) +- return PTR_ERR(ent); +- i_size_write(ent->d_inode, adev->rmmio_size); +- adev->debugfs_regs = ent; ++ if (!i) ++ i_size_write(ent->d_inode, adev->rmmio_size); ++ adev->debugfs_regs[i] = ent; ++ } + + return 0; + } + + static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) + { +- debugfs_remove(adev->debugfs_regs); +- adev->debugfs_regs = NULL; ++ unsigned i; ++ ++ for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { ++ if (adev->debugfs_regs[i]) { ++ debugfs_remove(adev->debugfs_regs[i]); ++ adev->debugfs_regs[i] = NULL; ++ } ++ } + } + + int amdgpu_debugfs_init(struct drm_minor *minor) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 604ed4d..f949be1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -50,9 +50,11 @@ + * KMS wrapper. + * - 3.0.0 - initial driver + * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP) ++ * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same ++ * at the end of IBs. + */ + #define KMS_DRIVER_MAJOR 3 +-#define KMS_DRIVER_MINOR 1 ++#define KMS_DRIVER_MINOR 2 + #define KMS_DRIVER_PATCHLEVEL 0 + + int amdgpu_vram_limit = 0; +@@ -170,7 +172,7 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444); + MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))"); + module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); + +-static struct pci_device_id pciidlist[] = { ++static const struct pci_device_id pciidlist[] = { + #ifdef CONFIG_DRM_AMDGPU_CIK + /* Kaveri */ + {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU}, +@@ -281,6 +283,28 @@ static struct pci_device_id pciidlist[] = { + {0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU}, + /* stoney */ + {0x1002, 0x98E4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_STONEY|AMD_IS_APU}, ++ /* Polaris11 */ ++ {0x1002, 0x67E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ {0x1002, 0x67E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ {0x1002, 0x67E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ {0x1002, 0x67EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ {0x1002, 0x67EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ {0x1002, 0x67FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ {0x1002, 0x67E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ {0x1002, 0x67E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ {0x1002, 0x67E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, ++ /* Polaris10 */ ++ {0x1002, 0x67C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67C8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, ++ {0x1002, 0x67CF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + + {0, 0, 0} + }; +@@ -322,6 +346,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, + return -ENODEV; + } + ++ /* ++ * Initialize amdkfd before starting radeon. If it was not loaded yet, ++ * defer radeon probing ++ */ ++ ret = amdgpu_amdkfd_init(); ++ if (ret == -EPROBE_DEFER) ++ return ret; ++ + /* Get rid of things like offb */ + ret = amdgpu_kick_out_firmware_fb(pdev); + if (ret) +@@ -549,9 +581,12 @@ static struct pci_driver amdgpu_kms_pci_driver = { + .driver.pm = &amdgpu_pm_ops, + }; + ++ ++ + static int __init amdgpu_init(void) + { + amdgpu_sync_init(); ++ amdgpu_fence_slab_init(); + #ifdef CONFIG_VGA_CONSOLE + if (vgacon_text_force()) { + DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); +@@ -564,9 +599,6 @@ static int __init amdgpu_init(void) + driver->driver_features |= DRIVER_MODESET; + driver->num_ioctls = amdgpu_max_kms_ioctl; + amdgpu_register_atpx_handler(); +- +- amdgpu_amdkfd_init(); +- + /* let modprobe override vga console setting */ + return drm_pci_init(driver, pdriver); + } +@@ -577,6 +609,7 @@ static void __exit amdgpu_exit(void) + drm_pci_exit(driver, pdriver); + amdgpu_unregister_atpx_handler(); + amdgpu_sync_fini(); ++ amdgpu_fence_slab_fini(); + } + + module_init(amdgpu_init); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +index 4303b44..2b89db4 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +@@ -55,8 +55,21 @@ struct amdgpu_fence { + }; + + static struct kmem_cache *amdgpu_fence_slab; +-static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0); + ++int amdgpu_fence_slab_init(void) ++{ ++ amdgpu_fence_slab = kmem_cache_create( ++ "amdgpu_fence", sizeof(struct amdgpu_fence), 0, ++ SLAB_HWCACHE_ALIGN, NULL); ++ if (!amdgpu_fence_slab) ++ return -ENOMEM; ++ return 0; ++} ++ ++void amdgpu_fence_slab_fini(void) ++{ ++ kmem_cache_destroy(amdgpu_fence_slab); ++} + /* + * Cast helper + */ +@@ -121,7 +134,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) + { + struct amdgpu_device *adev = ring->adev; + struct amdgpu_fence *fence; +- struct fence **ptr; ++ struct fence *old, **ptr; + uint32_t seq; + + fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); +@@ -141,7 +154,11 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) + /* This function can't be called concurrently anyway, otherwise + * emitting the fence would mess up the hardware ring buffer. + */ +- BUG_ON(rcu_dereference_protected(*ptr, 1)); ++ old = rcu_dereference_protected(*ptr, 1); ++ if (old && !fence_is_signaled(old)) { ++ DRM_INFO("rcu slot is busy\n"); ++ fence_wait(old, false); ++ } + + rcu_assign_pointer(*ptr, fence_get(&fence->base)); + +@@ -348,9 +365,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, + setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, + (unsigned long)ring); + +- ring->fence_drv.num_fences_mask = num_hw_submission - 1; ++ ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1; + spin_lock_init(&ring->fence_drv.lock); +- ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *), ++ ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), + GFP_KERNEL); + if (!ring->fence_drv.fences) + return -ENOMEM; +@@ -392,13 +409,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, + */ + int amdgpu_fence_driver_init(struct amdgpu_device *adev) + { +- if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) { +- amdgpu_fence_slab = kmem_cache_create( +- "amdgpu_fence", sizeof(struct amdgpu_fence), 0, +- SLAB_HWCACHE_ALIGN, NULL); +- if (!amdgpu_fence_slab) +- return -ENOMEM; +- } + if (amdgpu_debugfs_fence_init(adev)) + dev_err(adev->dev, "fence debugfs file creation failed\n"); + +@@ -437,9 +447,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) + kfree(ring->fence_drv.fences); + ring->fence_drv.initialized = false; + } +- +- if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) +- kmem_cache_destroy(amdgpu_fence_slab); + } + + /** +@@ -635,7 +642,7 @@ static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data) + return 0; + } + +-static struct drm_info_list amdgpu_debugfs_fence_list[] = { ++static const struct drm_info_list amdgpu_debugfs_fence_list[] = { + {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, + {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} + }; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +index 7312d72..921bce2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +@@ -238,18 +238,17 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset, + t = offset / AMDGPU_GPU_PAGE_SIZE; + p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); + for (i = 0; i < pages; i++, p++) { +- if (adev->gart.pages[p]) { +- adev->gart.pages[p] = NULL; +- adev->gart.pages_addr[p] = adev->dummy_page.addr; +- page_base = adev->gart.pages_addr[p]; +- if (!adev->gart.ptr) +- continue; ++#ifdef CONFIG_AMDGPU_GART_DEBUGFS ++ adev->gart.pages[p] = NULL; ++#endif ++ page_base = adev->dummy_page.addr; ++ if (!adev->gart.ptr) ++ continue; + +- for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { +- amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, +- t, page_base, flags); +- page_base += AMDGPU_GPU_PAGE_SIZE; +- } ++ for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { ++ amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, ++ t, page_base, flags); ++ page_base += AMDGPU_GPU_PAGE_SIZE; + } + } + mb(); +@@ -287,10 +286,11 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset, + p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); + + for (i = 0; i < pages; i++, p++) { +- adev->gart.pages_addr[p] = dma_addr[i]; ++#ifdef CONFIG_AMDGPU_GART_DEBUGFS + adev->gart.pages[p] = pagelist[i]; ++#endif + if (adev->gart.ptr) { +- page_base = adev->gart.pages_addr[p]; ++ page_base = dma_addr[i]; + for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { + amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags); + page_base += AMDGPU_GPU_PAGE_SIZE; +@@ -312,11 +312,11 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset, + */ + int amdgpu_gart_init(struct amdgpu_device *adev) + { +- int r, i; ++ int r; + +- if (adev->gart.pages) { ++ if (adev->dummy_page.page) + return 0; +- } ++ + /* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */ + if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("Page size is smaller than GPU page size!\n"); +@@ -330,22 +330,16 @@ int amdgpu_gart_init(struct amdgpu_device *adev) + adev->gart.num_gpu_pages = adev->mc.gtt_size / AMDGPU_GPU_PAGE_SIZE; + DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", + adev->gart.num_cpu_pages, adev->gart.num_gpu_pages); ++ ++#ifdef CONFIG_AMDGPU_GART_DEBUGFS + /* Allocate pages table */ + adev->gart.pages = vzalloc(sizeof(void *) * adev->gart.num_cpu_pages); + if (adev->gart.pages == NULL) { + amdgpu_gart_fini(adev); + return -ENOMEM; + } +- adev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) * +- adev->gart.num_cpu_pages); +- if (adev->gart.pages_addr == NULL) { +- amdgpu_gart_fini(adev); +- return -ENOMEM; +- } +- /* set GART entry to point to the dummy page by default */ +- for (i = 0; i < adev->gart.num_cpu_pages; i++) { +- adev->gart.pages_addr[i] = adev->dummy_page.addr; +- } ++#endif ++ + return 0; + } + +@@ -358,15 +352,14 @@ int amdgpu_gart_init(struct amdgpu_device *adev) + */ + void amdgpu_gart_fini(struct amdgpu_device *adev) + { +- if (adev->gart.pages && adev->gart.pages_addr && adev->gart.ready) { ++ if (adev->gart.ready) { + /* unbind pages */ + amdgpu_gart_unbind(adev, 0, adev->gart.num_cpu_pages); + } + adev->gart.ready = false; ++#ifdef CONFIG_AMDGPU_GART_DEBUGFS + vfree(adev->gart.pages); +- vfree(adev->gart.pages_addr); + adev->gart.pages = NULL; +- adev->gart.pages_addr = NULL; +- ++#endif + amdgpu_dummy_page_fini(adev); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +index c3f4e85..503d540 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +@@ -43,7 +43,7 @@ struct amdgpu_ring; + struct amdgpu_bo; + + struct amdgpu_gds_asic_info { +- uint32_t total_size; ++ uint32_t total_size; + uint32_t gfx_partition_size; + uint32_t cs_partition_size; + }; +@@ -52,8 +52,8 @@ struct amdgpu_gds { + struct amdgpu_gds_asic_info mem; + struct amdgpu_gds_asic_info gws; + struct amdgpu_gds_asic_info oa; +- /* At present, GDS, GWS and OA resources for gfx (graphics) +- * is always pre-allocated and available for graphics operation. ++ /* At present, GDS, GWS and OA resources for gfx (graphics) ++ * is always pre-allocated and available for graphics operation. + * Such resource is shared between all gfx clients. + * TODO: move this operation to user space + * */ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +index 3f8997a..0635bb6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +@@ -141,25 +141,40 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri + void amdgpu_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv) + { +- struct amdgpu_bo *rbo = gem_to_amdgpu_bo(obj); +- struct amdgpu_device *adev = rbo->adev; ++ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); ++ struct amdgpu_device *adev = bo->adev; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; ++ ++ struct amdgpu_bo_list_entry vm_pd; ++ struct list_head list, duplicates; ++ struct ttm_validate_buffer tv; ++ struct ww_acquire_ctx ticket; + struct amdgpu_bo_va *bo_va; + int r; +- r = amdgpu_bo_reserve(rbo, true); ++ ++ INIT_LIST_HEAD(&list); ++ INIT_LIST_HEAD(&duplicates); ++ ++ tv.bo = &bo->tbo; ++ tv.shared = true; ++ list_add(&tv.head, &list); ++ ++ amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); ++ ++ r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); + if (r) { + dev_err(adev->dev, "leaking bo va because " + "we fail to reserve bo (%d)\n", r); + return; + } +- bo_va = amdgpu_vm_bo_find(vm, rbo); ++ bo_va = amdgpu_vm_bo_find(vm, bo); + if (bo_va) { + if (--bo_va->ref_count == 0) { + amdgpu_vm_bo_rmv(adev, bo_va); + } + } +- amdgpu_bo_unreserve(rbo); ++ ttm_eu_backoff_reservation(&ticket, &list); + } + + static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r) +@@ -579,11 +594,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, + tv.shared = true; + list_add(&tv.head, &list); + +- if (args->operation == AMDGPU_VA_OP_MAP) { +- tv_pd.bo = &fpriv->vm.page_directory->tbo; +- tv_pd.shared = true; +- list_add(&tv_pd.head, &list); +- } ++ tv_pd.bo = &fpriv->vm.page_directory->tbo; ++ tv_pd.shared = true; ++ list_add(&tv_pd.head, &list); ++ + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); + if (r) { + drm_gem_object_unreference_unlocked(gobj); +@@ -783,7 +797,7 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) + return 0; + } + +-static struct drm_info_list amdgpu_debugfs_gem_list[] = { ++static const struct drm_info_list amdgpu_debugfs_gem_list[] = { + {"amdgpu_gem_info", &amdgpu_debugfs_gem_info, 0, NULL}, + }; + #endif +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +index 644336d..34e3542 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +@@ -74,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); + } + +- ib->vm = vm; +- ib->vm_id = 0; +- + return 0; + } + +@@ -89,7 +86,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, + * + * Free an IB (all asics). + */ +-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f) ++void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, ++ struct fence *f) + { + amdgpu_sa_bo_free(adev, &ib->sa_bo, f); + } +@@ -117,29 +115,37 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fen + */ + int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + struct amdgpu_ib *ibs, struct fence *last_vm_update, +- struct fence **f) ++ struct amdgpu_job *job, struct fence **f) + { + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib *ib = &ibs[0]; +- struct amdgpu_ctx *ctx, *old_ctx; ++ bool skip_preamble, need_ctx_switch; ++ unsigned patch_offset = ~0; + struct amdgpu_vm *vm; + struct fence *hwf; +- unsigned i, patch_offset = ~0; ++ uint64_t ctx; + ++ unsigned i; + int r = 0; + + if (num_ibs == 0) + return -EINVAL; + +- ctx = ibs->ctx; +- vm = ibs->vm; ++ /* ring tests don't use a job */ ++ if (job) { ++ vm = job->vm; ++ ctx = job->ctx; ++ } else { ++ vm = NULL; ++ ctx = 0; ++ } + + if (!ring->ready) { + dev_err(adev->dev, "couldn't schedule ib\n"); + return -EINVAL; + } + +- if (vm && !ibs->vm_id) { ++ if (vm && !job->vm_id) { + dev_err(adev->dev, "VM IB without ID\n"); + return -EINVAL; + } +@@ -154,54 +160,54 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + patch_offset = amdgpu_ring_init_cond_exec(ring); + + if (vm) { +- /* do context switch */ +- amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr, +- ib->gds_base, ib->gds_size, +- ib->gws_base, ib->gws_size, +- ib->oa_base, ib->oa_size); +- +- if (ring->funcs->emit_hdp_flush) +- amdgpu_ring_emit_hdp_flush(ring); ++ r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, ++ job->gds_base, job->gds_size, ++ job->gws_base, job->gws_size, ++ job->oa_base, job->oa_size); ++ if (r) { ++ amdgpu_ring_undo(ring); ++ return r; ++ } + } + ++ if (ring->funcs->emit_hdp_flush) ++ amdgpu_ring_emit_hdp_flush(ring); ++ + /* always set cond_exec_polling to CONTINUE */ + *ring->cond_exe_cpu_addr = 1; + +- old_ctx = ring->current_ctx; ++ skip_preamble = ring->current_ctx == ctx; ++ need_ctx_switch = ring->current_ctx != ctx; + for (i = 0; i < num_ibs; ++i) { + ib = &ibs[i]; + +- if (ib->ctx != ctx || ib->vm != vm) { +- ring->current_ctx = old_ctx; +- if (ib->vm_id) +- amdgpu_vm_reset_id(adev, ib->vm_id); +- amdgpu_ring_undo(ring); +- return -EINVAL; +- } +- amdgpu_ring_emit_ib(ring, ib); +- ring->current_ctx = ctx; +- } ++ /* drop preamble IBs if we don't have a context switch */ ++ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) ++ continue; + +- if (vm) { +- if (ring->funcs->emit_hdp_invalidate) +- amdgpu_ring_emit_hdp_invalidate(ring); ++ amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, ++ need_ctx_switch); ++ need_ctx_switch = false; + } + ++ if (ring->funcs->emit_hdp_invalidate) ++ amdgpu_ring_emit_hdp_invalidate(ring); ++ + r = amdgpu_fence_emit(ring, &hwf); + if (r) { + dev_err(adev->dev, "failed to emit fence (%d)\n", r); +- ring->current_ctx = old_ctx; +- if (ib->vm_id) +- amdgpu_vm_reset_id(adev, ib->vm_id); ++ if (job && job->vm_id) ++ amdgpu_vm_reset_id(adev, job->vm_id); + amdgpu_ring_undo(ring); + return r; + } + + /* wrap the last IB with fence */ +- if (ib->user) { +- uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); +- addr += ib->user->offset; +- amdgpu_ring_emit_fence(ring, addr, ib->sequence, ++ if (job && job->uf_bo) { ++ uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo); ++ ++ addr += job->uf_offset; ++ amdgpu_ring_emit_fence(ring, addr, job->uf_sequence, + AMDGPU_FENCE_FLAG_64BIT); + } + +@@ -211,6 +217,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + if (patch_offset != ~0 && ring->funcs->patch_cond_exec) + amdgpu_ring_patch_cond_exec(ring, patch_offset); + ++ ring->current_ctx = ctx; + amdgpu_ring_commit(ring); + return 0; + } +@@ -325,7 +332,7 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data) + + } + +-static struct drm_info_list amdgpu_debugfs_sa_list[] = { ++static const struct drm_info_list amdgpu_debugfs_sa_list[] = { + {"amdgpu_sa_info", &amdgpu_debugfs_sa_info, 0, NULL}, + }; + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +index 04ded38..8d34ccd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +@@ -25,6 +25,7 @@ + * Alex Deucher + * Jerome Glisse + */ ++#include <linux/irq.h> + #include <drm/drmP.h> + #include <drm/drm_crtc_helper.h> + #include <drm/amdgpu_drm.h> +@@ -239,6 +240,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) + INIT_WORK(&adev->hotplug_work, + amdgpu_hotplug_work_func); + } ++ adev->ddev->vblank_disable_allowed = true; + + INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func); + +@@ -505,7 +507,7 @@ static int amdgpu_irqdomain_map(struct irq_domain *d, + return 0; + } + +-static struct irq_domain_ops amdgpu_hw_irqdomain_ops = { ++static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = { + .map = amdgpu_irqdomain_map, + }; + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +index a052ac2..23c8c84 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +@@ -46,7 +46,7 @@ void amdgpu_job_timeout_func(struct work_struct *work) + } + + int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, +- struct amdgpu_job **job) ++ struct amdgpu_job **job, struct amdgpu_vm *vm) + { + size_t size = sizeof(struct amdgpu_job); + +@@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, + return -ENOMEM; + + (*job)->adev = adev; ++ (*job)->vm = vm; + (*job)->ibs = (void *)&(*job)[1]; + (*job)->num_ibs = num_ibs; + INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler); +@@ -74,7 +75,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, + { + int r; + +- r = amdgpu_job_alloc(adev, 1, job); ++ r = amdgpu_job_alloc(adev, 1, job, NULL); + if (r) + return r; + +@@ -96,7 +97,7 @@ void amdgpu_job_free(struct amdgpu_job *job) + amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f); + fence_put(job->fence); + +- amdgpu_bo_unref(&job->uf.bo); ++ amdgpu_bo_unref(&job->uf_bo); + amdgpu_sync_free(&job->sync); + + if (!job->base.use_sched) +@@ -121,14 +122,13 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, + return -EINVAL; + + r = amd_sched_job_init(&job->base, &ring->sched, +- entity, +- amdgpu_job_timeout_func, +- amdgpu_job_free_func, +- owner, &fence); ++ entity, amdgpu_job_timeout_func, ++ amdgpu_job_free_func, owner, &fence); + if (r) + return r; + + job->owner = owner; ++ job->ctx = entity->fence_context; + *f = fence_get(fence); + amd_sched_entity_push_job(&job->base); + +@@ -138,27 +138,19 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, + static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) + { + struct amdgpu_job *job = to_amdgpu_job(sched_job); +- struct amdgpu_vm *vm = job->ibs->vm; ++ struct amdgpu_vm *vm = job->vm; + + struct fence *fence = amdgpu_sync_get_fence(&job->sync); + +- if (fence == NULL && vm && !job->ibs->vm_id) { ++ if (fence == NULL && vm && !job->vm_id) { + struct amdgpu_ring *ring = job->ring; +- unsigned i, vm_id; +- uint64_t vm_pd_addr; + int r; + + r = amdgpu_vm_grab_id(vm, ring, &job->sync, + &job->base.s_fence->base, +- &vm_id, &vm_pd_addr); ++ &job->vm_id, &job->vm_pd_addr); + if (r) + DRM_ERROR("Error getting VM ID (%d)\n", r); +- else { +- for (i = 0; i < job->num_ibs; ++i) { +- job->ibs[i].vm_id = vm_id; +- job->ibs[i].vm_pd_addr = vm_pd_addr; +- } +- } + + fence = amdgpu_sync_get_fence(&job->sync); + } +@@ -186,7 +178,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) + + trace_amdgpu_sched_run_job(job); + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, +- job->sync.last_vm_update, &fence); ++ job->sync.last_vm_update, job, &fence); + if (r) { + DRM_ERROR("Error scheduling IBs (%d)\n", r); + goto err; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index 45d3b6a..bf327c6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -303,7 +303,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + fw_info.feature = adev->vce.fb_version; + break; + case AMDGPU_INFO_FW_UVD: +- fw_info.ver = 0; ++ fw_info.ver = adev->uvd.fw_version; + fw_info.feature = 0; + break; + case AMDGPU_INFO_FW_GMC: +@@ -382,8 +382,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + struct drm_amdgpu_info_vram_gtt vram_gtt; + + vram_gtt.vram_size = adev->mc.real_vram_size; ++ vram_gtt.vram_size -= adev->vram_pin_size; + vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size; +- vram_gtt.vram_cpu_accessible_size -= adev->vram_pin_size; ++ vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); + vram_gtt.gtt_size = adev->mc.gtt_size; + vram_gtt.gtt_size -= adev->gart_pin_size; + return copy_to_user(out, &vram_gtt, +@@ -426,7 +427,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + } + case AMDGPU_INFO_DEV_INFO: { + struct drm_amdgpu_info_device dev_info = {}; +- struct amdgpu_cu_info cu_info; + + dev_info.device_id = dev->pdev->device; + dev_info.chip_rev = adev->rev_id; +@@ -460,11 +460,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + AMDGPU_GPU_PAGE_SIZE; + dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; + +- amdgpu_asic_get_cu_info(adev, &cu_info); +- dev_info.cu_active_number = cu_info.number; +- dev_info.cu_ao_mask = cu_info.ao_cu_mask; ++ dev_info.cu_active_number = adev->gfx.cu_info.number; ++ dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; + dev_info.ce_ram_size = adev->gfx.ce_ram_size; +- memcpy(&dev_info.cu_bitmap[0], &cu_info.bitmap[0], sizeof(cu_info.bitmap)); ++ memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], ++ sizeof(adev->gfx.cu_info.bitmap)); + dev_info.vram_type = adev->mc.vram_type; + dev_info.vram_bit_width = adev->mc.vram_width; + dev_info.vce_harvest_config = adev->vce.harvest_config; +@@ -753,4 +753,4 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + }; +-int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); ++const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +index 151a2d4..7ecea83 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +@@ -424,9 +424,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, + bo->pin_count = 1; + if (gpu_addr != NULL) + *gpu_addr = amdgpu_bo_gpu_offset(bo); +- if (domain == AMDGPU_GEM_DOMAIN_VRAM) ++ if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + bo->adev->vram_pin_size += amdgpu_bo_size(bo); +- else ++ if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) ++ bo->adev->invisible_pin_size += amdgpu_bo_size(bo); ++ } else + bo->adev->gart_pin_size += amdgpu_bo_size(bo); + } else { + dev_err(bo->adev->dev, "%p pin failed\n", bo); +@@ -456,9 +458,11 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) + } + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (likely(r == 0)) { +- if (bo->tbo.mem.mem_type == TTM_PL_VRAM) ++ if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { + bo->adev->vram_pin_size -= amdgpu_bo_size(bo); +- else ++ if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) ++ bo->adev->invisible_pin_size -= amdgpu_bo_size(bo); ++ } else + bo->adev->gart_pin_size -= amdgpu_bo_size(bo); + } else { + dev_err(bo->adev->dev, "%p validate failed for unpin\n", bo); +@@ -476,6 +480,17 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev) + return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); + } + ++static const char *amdgpu_vram_names[] = { ++ "UNKNOWN", ++ "GDDR1", ++ "DDR2", ++ "GDDR3", ++ "GDDR4", ++ "GDDR5", ++ "HBM", ++ "DDR3" ++}; ++ + int amdgpu_bo_init(struct amdgpu_device *adev) + { + /* Add an MTRR for the VRAM */ +@@ -484,8 +499,8 @@ int amdgpu_bo_init(struct amdgpu_device *adev) + DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", + adev->mc.mc_vram_size >> 20, + (unsigned long long)adev->mc.aper_size >> 20); +- DRM_INFO("RAM width %dbits DDR\n", +- adev->mc.vram_width); ++ DRM_INFO("RAM width %dbits %s\n", ++ adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]); + return amdgpu_ttm_init(adev); + } + +@@ -526,6 +541,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, + if (!metadata_size) { + if (bo->metadata_size) { + kfree(bo->metadata); ++ bo->metadata = NULL; + bo->metadata_size = 0; + } + return 0; +@@ -608,6 +624,10 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) + if ((offset + size) <= adev->mc.visible_vram_size) + return 0; + ++ /* Can't move a pinned BO to visible VRAM */ ++ if (abo->pin_count > 0) ++ return -EINVAL; ++ + /* hurrah the memory is not visible ! */ + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); + lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +index be6388f..7700dc2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +@@ -57,9 +57,10 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) + ttm_bo_kunmap(&bo->dma_buf_vmap); + } + +-struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev, +- struct dma_buf_attachment *attach, +- struct sg_table *sg) ++struct drm_gem_object * ++amdgpu_gem_prime_import_sg_table(struct drm_device *dev, ++ struct dma_buf_attachment *attach, ++ struct sg_table *sg) + { + struct reservation_object *resv = attach->dmabuf->resv; + struct amdgpu_device *adev = dev->dev_private; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +index dd79243..1b0b7ae 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +@@ -28,6 +28,7 @@ + */ + #include <linux/seq_file.h> + #include <linux/slab.h> ++#include <linux/debugfs.h> + #include <drm/drmP.h> + #include <drm/amdgpu_drm.h> + #include "amdgpu.h" +@@ -46,7 +47,8 @@ + * wptr. The GPU then starts fetching commands and executes + * them until the pointers are equal again. + */ +-static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); ++static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, ++ struct amdgpu_ring *ring); + + /** + * amdgpu_ring_alloc - allocate space on the ring buffer +@@ -215,18 +217,17 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring, + * + * @adev: amdgpu_device pointer + * @ring: amdgpu_ring structure holding ring information +- * @ring_size: size of the ring ++ * @max_ndw: maximum number of dw for ring alloc + * @nop: nop packet for this ring + * + * Initialize the driver information for the selected ring (all asics). + * Returns 0 on success, error on failure. + */ + int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, +- unsigned ring_size, u32 nop, u32 align_mask, ++ unsigned max_dw, u32 nop, u32 align_mask, + struct amdgpu_irq_src *irq_src, unsigned irq_type, + enum amdgpu_ring_type ring_type) + { +- u32 rb_bufsz; + int r; + + if (ring->adev == NULL) { +@@ -265,7 +266,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, + dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r); + return r; + } +- ring->next_rptr_gpu_addr = adev->wb.gpu_addr + (ring->next_rptr_offs * 4); ++ ring->next_rptr_gpu_addr = adev->wb.gpu_addr + ring->next_rptr_offs * 4; + ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs]; + + r = amdgpu_wb_get(adev, &ring->cond_exe_offs); +@@ -283,10 +284,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, + return r; + } + +- /* Align ring size */ +- rb_bufsz = order_base_2(ring_size / 8); +- ring_size = (1 << (rb_bufsz + 1)) * 4; +- ring->ring_size = ring_size; ++ ring->ring_size = roundup_pow_of_two(max_dw * 4 * ++ amdgpu_sched_hw_submission); + ring->align_mask = align_mask; + ring->nop = nop; + ring->type = ring_type; +@@ -319,8 +318,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, + } + } + ring->ptr_mask = (ring->ring_size / 4) - 1; +- ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4, +- amdgpu_sched_hw_submission); ++ ring->max_dw = max_dw; + + if (amdgpu_debugfs_ring_init(adev, ring)) { + DRM_ERROR("Failed to register debugfs file for rings !\n"); +@@ -367,96 +365,82 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) + */ + #if defined(CONFIG_DEBUG_FS) + +-static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) ++/* Layout of file is 12 bytes consisting of ++ * - rptr ++ * - wptr ++ * - driver's copy of wptr ++ * ++ * followed by n-words of ring data ++ */ ++static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, ++ size_t size, loff_t *pos) + { +- struct drm_info_node *node = (struct drm_info_node *) m->private; +- struct drm_device *dev = node->minor->dev; +- struct amdgpu_device *adev = dev->dev_private; +- int roffset = *(int*)node->info_ent->data; +- struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset); +- +- uint32_t rptr, wptr, rptr_next; +- unsigned i; +- +- wptr = amdgpu_ring_get_wptr(ring); +- seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr); +- +- rptr = amdgpu_ring_get_rptr(ring); +- rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr); +- +- seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr); +- +- seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", +- ring->wptr, ring->wptr); +- +- if (!ring->ready) +- return 0; +- +- /* print 8 dw before current rptr as often it's the last executed +- * packet that is the root issue +- */ +- i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; +- while (i != rptr) { +- seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); +- if (i == rptr) +- seq_puts(m, " *"); +- if (i == rptr_next) +- seq_puts(m, " #"); +- seq_puts(m, "\n"); +- i = (i + 1) & ring->ptr_mask; ++ struct amdgpu_ring *ring = (struct amdgpu_ring*)f->f_inode->i_private; ++ int r, i; ++ uint32_t value, result, early[3]; ++ ++ if (*pos & 3 || size & 3) ++ return -EINVAL; ++ ++ result = 0; ++ ++ if (*pos < 12) { ++ early[0] = amdgpu_ring_get_rptr(ring); ++ early[1] = amdgpu_ring_get_wptr(ring); ++ early[2] = ring->wptr; ++ for (i = *pos / 4; i < 3 && size; i++) { ++ r = put_user(early[i], (uint32_t *)buf); ++ if (r) ++ return r; ++ buf += 4; ++ result += 4; ++ size -= 4; ++ *pos += 4; ++ } + } +- while (i != wptr) { +- seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); +- if (i == rptr) +- seq_puts(m, " *"); +- if (i == rptr_next) +- seq_puts(m, " #"); +- seq_puts(m, "\n"); +- i = (i + 1) & ring->ptr_mask; ++ ++ while (size) { ++ if (*pos >= (ring->ring_size + 12)) ++ return result; ++ ++ value = ring->ring[(*pos - 12)/4]; ++ r = put_user(value, (uint32_t*)buf); ++ if (r) ++ return r; ++ buf += 4; ++ result += 4; ++ size -= 4; ++ *pos += 4; + } +- return 0; ++ ++ return result; + } + +-/* TODO: clean this up !*/ +-static int amdgpu_gfx_index = offsetof(struct amdgpu_device, gfx.gfx_ring[0]); +-static int cayman_cp1_index = offsetof(struct amdgpu_device, gfx.compute_ring[0]); +-static int cayman_cp2_index = offsetof(struct amdgpu_device, gfx.compute_ring[1]); +-static int amdgpu_dma1_index = offsetof(struct amdgpu_device, sdma.instance[0].ring); +-static int amdgpu_dma2_index = offsetof(struct amdgpu_device, sdma.instance[1].ring); +-static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring); +-static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]); +-static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]); +- +-static struct drm_info_list amdgpu_debugfs_ring_info_list[] = { +- {"amdgpu_ring_gfx", amdgpu_debugfs_ring_info, 0, &amdgpu_gfx_index}, +- {"amdgpu_ring_cp1", amdgpu_debugfs_ring_info, 0, &cayman_cp1_index}, +- {"amdgpu_ring_cp2", amdgpu_debugfs_ring_info, 0, &cayman_cp2_index}, +- {"amdgpu_ring_dma1", amdgpu_debugfs_ring_info, 0, &amdgpu_dma1_index}, +- {"amdgpu_ring_dma2", amdgpu_debugfs_ring_info, 0, &amdgpu_dma2_index}, +- {"amdgpu_ring_uvd", amdgpu_debugfs_ring_info, 0, &r600_uvd_index}, +- {"amdgpu_ring_vce1", amdgpu_debugfs_ring_info, 0, &si_vce1_index}, +- {"amdgpu_ring_vce2", amdgpu_debugfs_ring_info, 0, &si_vce2_index}, ++static const struct file_operations amdgpu_debugfs_ring_fops = { ++ .owner = THIS_MODULE, ++ .read = amdgpu_debugfs_ring_read, ++ .llseek = default_llseek + }; + + #endif + +-static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) ++static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, ++ struct amdgpu_ring *ring) + { + #if defined(CONFIG_DEBUG_FS) +- unsigned i; +- for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) { +- struct drm_info_list *info = &amdgpu_debugfs_ring_info_list[i]; +- int roffset = *(int*)amdgpu_debugfs_ring_info_list[i].data; +- struct amdgpu_ring *other = (void *)(((uint8_t*)adev) + roffset); +- unsigned r; ++ struct drm_minor *minor = adev->ddev->primary; ++ struct dentry *ent, *root = minor->debugfs_root; ++ char name[32]; + +- if (other != ring) +- continue; ++ sprintf(name, "amdgpu_ring_%s", ring->name); + +- r = amdgpu_debugfs_add_files(adev, info, 1); +- if (r) +- return r; +- } ++ ent = debugfs_create_file(name, ++ S_IFREG | S_IRUGO, root, ++ ring, &amdgpu_debugfs_ring_fops); ++ if (IS_ERR(ent)) ++ return PTR_ERR(ent); ++ ++ i_size_write(ent->d_inode, ring->ring_size + 12); + #endif + return 0; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +index c48b4fc..34a9280 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +@@ -109,6 +109,29 @@ static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence) + } + + /** ++ * amdgpu_sync_add_later - add the fence to the hash ++ * ++ * @sync: sync object to add the fence to ++ * @f: fence to add ++ * ++ * Tries to add the fence to an existing hash entry. Returns true when an entry ++ * was found, false otherwise. ++ */ ++static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f) ++{ ++ struct amdgpu_sync_entry *e; ++ ++ hash_for_each_possible(sync->fences, e, node, f->context) { ++ if (unlikely(e->fence->context != f->context)) ++ continue; ++ ++ amdgpu_sync_keep_later(&e->fence, f); ++ return true; ++ } ++ return false; ++} ++ ++/** + * amdgpu_sync_fence - remember to sync to this fence + * + * @sync: sync object to add fence to +@@ -127,13 +150,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, + amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM) + amdgpu_sync_keep_later(&sync->last_vm_update, f); + +- hash_for_each_possible(sync->fences, e, node, f->context) { +- if (unlikely(e->fence->context != f->context)) +- continue; +- +- amdgpu_sync_keep_later(&e->fence, f); ++ if (amdgpu_sync_add_later(sync, f)) + return 0; +- } + + e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); + if (!e) +@@ -204,6 +222,81 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, + return r; + } + ++/** ++ * amdgpu_sync_is_idle - test if all fences are signaled ++ * ++ * @sync: the sync object ++ * ++ * Returns true if all fences in the sync object are signaled. ++ */ ++bool amdgpu_sync_is_idle(struct amdgpu_sync *sync) ++{ ++ struct amdgpu_sync_entry *e; ++ struct hlist_node *tmp; ++ int i; ++ ++ hash_for_each_safe(sync->fences, i, tmp, e, node) { ++ struct fence *f = e->fence; ++ ++ if (fence_is_signaled(f)) { ++ hash_del(&e->node); ++ fence_put(f); ++ kmem_cache_free(amdgpu_sync_slab, e); ++ continue; ++ } ++ ++ return false; ++ } ++ ++ return true; ++} ++ ++/** ++ * amdgpu_sync_cycle_fences - move fences from one sync object into another ++ * ++ * @dst: the destination sync object ++ * @src: the source sync object ++ * @fence: fence to add to source ++ * ++ * Remove all fences from source and put them into destination and add ++ * fence as new one into source. ++ */ ++int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src, ++ struct fence *fence) ++{ ++ struct amdgpu_sync_entry *e, *newone; ++ struct hlist_node *tmp; ++ int i; ++ ++ /* Allocate the new entry before moving the old ones */ ++ newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); ++ if (!newone) ++ return -ENOMEM; ++ ++ hash_for_each_safe(src->fences, i, tmp, e, node) { ++ struct fence *f = e->fence; ++ ++ hash_del(&e->node); ++ if (fence_is_signaled(f)) { ++ fence_put(f); ++ kmem_cache_free(amdgpu_sync_slab, e); ++ continue; ++ } ++ ++ if (amdgpu_sync_add_later(dst, f)) { ++ kmem_cache_free(amdgpu_sync_slab, e); ++ continue; ++ } ++ ++ hash_add(dst->fences, &e->node, f->context); ++ } ++ ++ hash_add(src->fences, &newone->node, fence->context); ++ newone->fence = fence_get(fence); ++ ++ return 0; ++} ++ + struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) + { + struct amdgpu_sync_entry *e; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +index 0f42b1a..3390282 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +@@ -223,6 +223,8 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) + { + struct amdgpu_bo *rbo = container_of(bo, struct amdgpu_bo, tbo); + ++ if (amdgpu_ttm_tt_get_usermm(bo->ttm)) ++ return -EPERM; + return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp); + } + +@@ -384,9 +386,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, + struct ttm_mem_reg *new_mem) + { + struct amdgpu_device *adev; ++ struct amdgpu_bo *abo; + struct ttm_mem_reg *old_mem = &bo->mem; + int r; + ++ /* Can't move a pinned BO */ ++ abo = container_of(bo, struct amdgpu_bo, tbo); ++ if (WARN_ON_ONCE(abo->pin_count > 0)) ++ return -EINVAL; ++ + adev = amdgpu_get_adev(bo->bdev); + if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { + amdgpu_move_null(bo, new_mem); +@@ -921,6 +929,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = { + + int amdgpu_ttm_init(struct amdgpu_device *adev) + { ++ unsigned i, j; + int r; + + r = amdgpu_ttm_global_init(adev); +@@ -938,6 +947,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) + DRM_ERROR("failed initializing buffer object driver(%d).\n", r); + return r; + } ++ ++ for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) { ++ struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i]; ++ ++ for (j = 0; j < TTM_NUM_MEM_TYPES; ++j) ++ lru->lru[j] = &adev->mman.bdev.man[j].lru; ++ lru->swap_lru = &adev->mman.bdev.glob->swap_lru; ++ } ++ + adev->mman.initialized = true; + r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, + adev->mc.real_vram_size >> PAGE_SHIFT); +@@ -1160,7 +1178,7 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) + static int ttm_pl_vram = TTM_PL_VRAM; + static int ttm_pl_tt = TTM_PL_TT; + +-static struct drm_info_list amdgpu_ttm_debugfs_list[] = { ++static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { + {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram}, + {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt}, + {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, +@@ -1211,6 +1229,8 @@ static const struct file_operations amdgpu_ttm_vram_fops = { + .llseek = default_llseek + }; + ++#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS ++ + static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) + { +@@ -1258,6 +1278,8 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { + + #endif + ++#endif ++ + static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) + { + #if defined(CONFIG_DEBUG_FS) +@@ -1273,6 +1295,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) + i_size_write(ent->d_inode, adev->mc.mc_vram_size); + adev->mman.vram = ent; + ++#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + ent = debugfs_create_file("amdgpu_gtt", S_IFREG | S_IRUGO, root, + adev, &amdgpu_ttm_gtt_fops); + if (IS_ERR(ent)) +@@ -1280,6 +1303,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) + i_size_write(ent->d_inode, adev->mc.gtt_size); + adev->mman.gtt = ent; + ++#endif + count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); + + #ifdef CONFIG_SWIOTLB +@@ -1301,7 +1325,10 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) + debugfs_remove(adev->mman.vram); + adev->mman.vram = NULL; + ++#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + debugfs_remove(adev->mman.gtt); + adev->mman.gtt = NULL; + #endif ++ ++#endif + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +index 917145b..16d58b8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +@@ -41,19 +41,23 @@ + + /* 1 second timeout */ + #define UVD_IDLE_TIMEOUT_MS 1000 ++/* Polaris10/11 firmware version */ ++#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) + + /* Firmware Names */ + #ifdef CONFIG_DRM_AMDGPU_CIK + #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" +-#define FIRMWARE_KABINI "radeon/kabini_uvd.bin" +-#define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin" +-#define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin" ++#define FIRMWARE_KABINI "radeon/kabini_uvd.bin" ++#define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin" ++#define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin" + #define FIRMWARE_MULLINS "radeon/mullins_uvd.bin" + #endif + #define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin" + #define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin" + #define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin" + #define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin" ++#define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin" ++#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" + + /** + * amdgpu_uvd_cs_ctx - Command submission parser context +@@ -85,6 +89,8 @@ MODULE_FIRMWARE(FIRMWARE_TONGA); + MODULE_FIRMWARE(FIRMWARE_CARRIZO); + MODULE_FIRMWARE(FIRMWARE_FIJI); + MODULE_FIRMWARE(FIRMWARE_STONEY); ++MODULE_FIRMWARE(FIRMWARE_POLARIS10); ++MODULE_FIRMWARE(FIRMWARE_POLARIS11); + + static void amdgpu_uvd_note_usage(struct amdgpu_device *adev); + static void amdgpu_uvd_idle_work_handler(struct work_struct *work); +@@ -131,6 +137,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) + case CHIP_STONEY: + fw_name = FIRMWARE_STONEY; + break; ++ case CHIP_POLARIS10: ++ fw_name = FIRMWARE_POLARIS10; ++ break; ++ case CHIP_POLARIS11: ++ fw_name = FIRMWARE_POLARIS11; ++ break; + default: + return -EINVAL; + } +@@ -161,6 +173,15 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) + DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + ++ adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | ++ (family_id << 8)); ++ ++ if ((adev->asic_type == CHIP_POLARIS10 || ++ adev->asic_type == CHIP_POLARIS11) && ++ (adev->uvd.fw_version < FW_1_66_16)) ++ DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", ++ version_major, version_minor); ++ + /* + * Limit the number of UVD handles depending on microcode major + * and minor versions. The firmware version which has 40 UVD +@@ -255,32 +276,30 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) + + int amdgpu_uvd_suspend(struct amdgpu_device *adev) + { +- struct amdgpu_ring *ring = &adev->uvd.ring; +- int i, r; ++ unsigned size; ++ void *ptr; ++ int i; + + if (adev->uvd.vcpu_bo == NULL) + return 0; + +- for (i = 0; i < adev->uvd.max_handles; ++i) { +- uint32_t handle = atomic_read(&adev->uvd.handles[i]); +- if (handle != 0) { +- struct fence *fence; ++ for (i = 0; i < adev->uvd.max_handles; ++i) ++ if (atomic_read(&adev->uvd.handles[i])) ++ break; + +- amdgpu_uvd_note_usage(adev); ++ if (i == AMDGPU_MAX_UVD_HANDLES) ++ return 0; + +- r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence); +- if (r) { +- DRM_ERROR("Error destroying UVD (%d)!\n", r); +- continue; +- } ++ cancel_delayed_work_sync(&adev->uvd.idle_work); + +- fence_wait(fence, false); +- fence_put(fence); ++ size = amdgpu_bo_size(adev->uvd.vcpu_bo); ++ ptr = adev->uvd.cpu_addr; + +- adev->uvd.filp[i] = NULL; +- atomic_set(&adev->uvd.handles[i], 0); +- } +- } ++ adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); ++ if (!adev->uvd.saved_bo) ++ return -ENOMEM; ++ ++ memcpy(adev->uvd.saved_bo, ptr, size); + + return 0; + } +@@ -289,23 +308,29 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) + { + unsigned size; + void *ptr; +- const struct common_firmware_header *hdr; +- unsigned offset; + + if (adev->uvd.vcpu_bo == NULL) + return -EINVAL; + +- hdr = (const struct common_firmware_header *)adev->uvd.fw->data; +- offset = le32_to_cpu(hdr->ucode_array_offset_bytes); +- memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset, +- (adev->uvd.fw->size) - offset); +- + size = amdgpu_bo_size(adev->uvd.vcpu_bo); +- size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr = adev->uvd.cpu_addr; +- ptr += le32_to_cpu(hdr->ucode_size_bytes); + +- memset(ptr, 0, size); ++ if (adev->uvd.saved_bo != NULL) { ++ memcpy(ptr, adev->uvd.saved_bo, size); ++ kfree(adev->uvd.saved_bo); ++ adev->uvd.saved_bo = NULL; ++ } else { ++ const struct common_firmware_header *hdr; ++ unsigned offset; ++ ++ hdr = (const struct common_firmware_header *)adev->uvd.fw->data; ++ offset = le32_to_cpu(hdr->ucode_array_offset_bytes); ++ memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset, ++ (adev->uvd.fw->size) - offset); ++ size -= le32_to_cpu(hdr->ucode_size_bytes); ++ ptr += le32_to_cpu(hdr->ucode_size_bytes); ++ memset(ptr, 0, size); ++ } + + return 0; + } +@@ -397,7 +422,8 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) + * + * Peek into the decode message and calculate the necessary buffer sizes. + */ +-static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) ++static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, ++ unsigned buf_sizes[]) + { + unsigned stream_type = msg[4]; + unsigned width = msg[6]; +@@ -419,7 +445,6 @@ static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) + + switch (stream_type) { + case 0: /* H264 */ +- case 7: /* H264 Perf */ + switch(level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; +@@ -497,6 +522,54 @@ static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) + min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); + break; + ++ case 7: /* H264 Perf */ ++ switch(level) { ++ case 30: ++ num_dpb_buffer = 8100 / fs_in_mb; ++ break; ++ case 31: ++ num_dpb_buffer = 18000 / fs_in_mb; ++ break; ++ case 32: ++ num_dpb_buffer = 20480 / fs_in_mb; ++ break; ++ case 41: ++ num_dpb_buffer = 32768 / fs_in_mb; ++ break; ++ case 42: ++ num_dpb_buffer = 34816 / fs_in_mb; ++ break; ++ case 50: ++ num_dpb_buffer = 110400 / fs_in_mb; ++ break; ++ case 51: ++ num_dpb_buffer = 184320 / fs_in_mb; ++ break; ++ default: ++ num_dpb_buffer = 184320 / fs_in_mb; ++ break; ++ } ++ num_dpb_buffer++; ++ if (num_dpb_buffer > 17) ++ num_dpb_buffer = 17; ++ ++ /* reference picture buffer */ ++ min_dpb_size = image_size * num_dpb_buffer; ++ ++ if (adev->asic_type < CHIP_POLARIS10){ ++ /* macroblock context buffer */ ++ min_dpb_size += ++ width_in_mb * height_in_mb * num_dpb_buffer * 192; ++ ++ /* IT surface buffer */ ++ min_dpb_size += width_in_mb * height_in_mb * 32; ++ } else { ++ /* macroblock context buffer */ ++ min_ctx_size = ++ width_in_mb * height_in_mb * num_dpb_buffer * 192; ++ } ++ break; ++ + case 16: /* H265 */ + image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; + image_size = ALIGN(image_size, 256); +@@ -592,7 +665,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, + + case 1: + /* it's a decode msg, calc buffer sizes */ +- r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes); ++ r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes); + amdgpu_bo_kunmap(bo); + if (r) + return r; +@@ -613,7 +686,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, + + case 2: + /* it's a destroy msg, free the handle */ +- for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) ++ for (i = 0; i < adev->uvd.max_handles; ++i) + atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); + amdgpu_bo_kunmap(bo); + return 0; +@@ -893,7 +966,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, + ib->length_dw = 16; + + if (direct) { +- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); ++ r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); + job->fence = f; + if (r) + goto err_free; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +index 4bec0c1..875626a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +@@ -41,15 +41,17 @@ + /* Firmware Names */ + #ifdef CONFIG_DRM_AMDGPU_CIK + #define FIRMWARE_BONAIRE "radeon/bonaire_vce.bin" +-#define FIRMWARE_KABINI "radeon/kabini_vce.bin" +-#define FIRMWARE_KAVERI "radeon/kaveri_vce.bin" +-#define FIRMWARE_HAWAII "radeon/hawaii_vce.bin" ++#define FIRMWARE_KABINI "radeon/kabini_vce.bin" ++#define FIRMWARE_KAVERI "radeon/kaveri_vce.bin" ++#define FIRMWARE_HAWAII "radeon/hawaii_vce.bin" + #define FIRMWARE_MULLINS "radeon/mullins_vce.bin" + #endif + #define FIRMWARE_TONGA "amdgpu/tonga_vce.bin" + #define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin" + #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" + #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" ++#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" ++#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" + + #ifdef CONFIG_DRM_AMDGPU_CIK + MODULE_FIRMWARE(FIRMWARE_BONAIRE); +@@ -62,6 +64,8 @@ MODULE_FIRMWARE(FIRMWARE_TONGA); + MODULE_FIRMWARE(FIRMWARE_CARRIZO); + MODULE_FIRMWARE(FIRMWARE_FIJI); + MODULE_FIRMWARE(FIRMWARE_STONEY); ++MODULE_FIRMWARE(FIRMWARE_POLARIS10); ++MODULE_FIRMWARE(FIRMWARE_POLARIS11); + + static void amdgpu_vce_idle_work_handler(struct work_struct *work); + +@@ -113,6 +117,12 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) + case CHIP_STONEY: + fw_name = FIRMWARE_STONEY; + break; ++ case CHIP_POLARIS10: ++ fw_name = FIRMWARE_POLARIS10; ++ break; ++ case CHIP_POLARIS11: ++ fw_name = FIRMWARE_POLARIS11; ++ break; + + default: + return -EINVAL; +@@ -234,6 +244,7 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) + if (i == AMDGPU_MAX_VCE_HANDLES) + return 0; + ++ cancel_delayed_work_sync(&adev->vce.idle_work); + /* TODO: suspending running encoding sessions isn't supported */ + return -EINVAL; + } +@@ -425,7 +436,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + +- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); ++ r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); + job->fence = f; + if (r) + goto err; +@@ -487,7 +498,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + ib->ptr[i] = 0x0; + + if (direct) { +- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); ++ r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); + job->fence = f; + if (r) + goto err; +@@ -751,7 +762,8 @@ out: + * @ib: the IB to execute + * + */ +-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) ++void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch) + { + amdgpu_ring_write(ring, VCE_CMD_IB); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +index ef99d23..f40cf76 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +@@ -34,7 +34,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct fence **fence); + void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); + int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); +-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); ++void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch); + void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); + int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index 75154ac..9f36ed3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -53,6 +53,18 @@ + /* Special value that no flush is necessary */ + #define AMDGPU_VM_NO_FLUSH (~0ll) + ++/* Local structure. Encapsulate some VM table update parameters to reduce ++ * the number of function parameters ++ */ ++struct amdgpu_vm_update_params { ++ /* address where to copy page table entries from */ ++ uint64_t src; ++ /* DMA addresses to use for mapping */ ++ dma_addr_t *pages_addr; ++ /* indirect buffer to fill with commands */ ++ struct amdgpu_ib *ib; ++}; ++ + /** + * amdgpu_vm_num_pde - return the number of page directory entries + * +@@ -166,88 +178,109 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + { + uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); + struct amdgpu_device *adev = ring->adev; +- struct amdgpu_vm_id *id = &vm->ids[ring->idx]; + struct fence *updates = sync->last_vm_update; ++ struct amdgpu_vm_id *id; ++ unsigned i = ring->idx; + int r; + + mutex_lock(&adev->vm_manager.lock); + +- /* check if the id is still valid */ +- if (id->mgr_id) { +- struct fence *flushed = id->flushed_updates; +- bool is_later; +- long owner; ++ /* Check if we can use a VMID already assigned to this VM */ ++ do { ++ struct fence *flushed; + +- if (!flushed) +- is_later = true; +- else if (!updates) +- is_later = false; +- else +- is_later = fence_is_later(updates, flushed); ++ id = vm->ids[i++]; ++ if (i == AMDGPU_MAX_RINGS) ++ i = 0; + +- owner = atomic_long_read(&id->mgr_id->owner); +- if (!is_later && owner == (long)id && +- pd_addr == id->pd_gpu_addr) { ++ /* Check all the prerequisites to using this VMID */ ++ if (!id) ++ continue; + +- r = amdgpu_sync_fence(ring->adev, sync, +- id->mgr_id->active); +- if (r) { +- mutex_unlock(&adev->vm_manager.lock); +- return r; +- } ++ if (atomic64_read(&id->owner) != vm->client_id) ++ continue; + +- fence_put(id->mgr_id->active); +- id->mgr_id->active = fence_get(fence); ++ if (pd_addr != id->pd_gpu_addr) ++ continue; + +- list_move_tail(&id->mgr_id->list, +- &adev->vm_manager.ids_lru); ++ if (id->last_user != ring && ++ (!id->last_flush || !fence_is_signaled(id->last_flush))) ++ continue; + +- *vm_id = id->mgr_id - adev->vm_manager.ids; +- *vm_pd_addr = AMDGPU_VM_NO_FLUSH; +- trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, +- *vm_pd_addr); ++ flushed = id->flushed_updates; ++ if (updates && (!flushed || fence_is_later(updates, flushed))) ++ continue; + +- mutex_unlock(&adev->vm_manager.lock); +- return 0; ++ /* Good we can use this VMID */ ++ if (id->last_user == ring) { ++ r = amdgpu_sync_fence(ring->adev, sync, ++ id->first); ++ if (r) ++ goto error; + } +- } + +- id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru, +- struct amdgpu_vm_manager_id, +- list); ++ /* And remember this submission as user of the VMID */ ++ r = amdgpu_sync_fence(ring->adev, &id->active, fence); ++ if (r) ++ goto error; + +- if (id->mgr_id->active && !fence_is_signaled(id->mgr_id->active)) { +- struct amdgpu_vm_manager_id *mgr_id, *tmp; ++ list_move_tail(&id->list, &adev->vm_manager.ids_lru); ++ vm->ids[ring->idx] = id; ++ ++ *vm_id = id - adev->vm_manager.ids; ++ *vm_pd_addr = AMDGPU_VM_NO_FLUSH; ++ trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); ++ ++ mutex_unlock(&adev->vm_manager.lock); ++ return 0; ++ ++ } while (i != ring->idx); ++ ++ id = list_first_entry(&adev->vm_manager.ids_lru, ++ struct amdgpu_vm_id, ++ list); ++ ++ if (!amdgpu_sync_is_idle(&id->active)) { + struct list_head *head = &adev->vm_manager.ids_lru; +- list_for_each_entry_safe(mgr_id, tmp, &adev->vm_manager.ids_lru, list) { +- if (mgr_id->active && fence_is_signaled(mgr_id->active)) { +- list_move(&mgr_id->list, head); +- head = &mgr_id->list; ++ struct amdgpu_vm_id *tmp; ++ ++ list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru, ++ list) { ++ if (amdgpu_sync_is_idle(&id->active)) { ++ list_move(&id->list, head); ++ head = &id->list; + } + } +- id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru, +- struct amdgpu_vm_manager_id, +- list); ++ id = list_first_entry(&adev->vm_manager.ids_lru, ++ struct amdgpu_vm_id, ++ list); + } + +- r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active); +- if (!r) { +- fence_put(id->mgr_id->active); +- id->mgr_id->active = fence_get(fence); ++ r = amdgpu_sync_cycle_fences(sync, &id->active, fence); ++ if (r) ++ goto error; ++ ++ fence_put(id->first); ++ id->first = fence_get(fence); + +- fence_put(id->flushed_updates); +- id->flushed_updates = fence_get(updates); ++ fence_put(id->last_flush); ++ id->last_flush = NULL; + +- id->pd_gpu_addr = pd_addr; ++ fence_put(id->flushed_updates); ++ id->flushed_updates = fence_get(updates); + +- list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru); +- atomic_long_set(&id->mgr_id->owner, (long)id); ++ id->pd_gpu_addr = pd_addr; + +- *vm_id = id->mgr_id - adev->vm_manager.ids; +- *vm_pd_addr = pd_addr; +- trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); +- } ++ list_move_tail(&id->list, &adev->vm_manager.ids_lru); ++ id->last_user = ring; ++ atomic64_set(&id->owner, vm->client_id); ++ vm->ids[ring->idx] = id; + ++ *vm_id = id - adev->vm_manager.ids; ++ *vm_pd_addr = pd_addr; ++ trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); ++ ++error: + mutex_unlock(&adev->vm_manager.lock); + return r; + } +@@ -261,43 +294,62 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + * + * Emit a VM flush when it is necessary. + */ +-void amdgpu_vm_flush(struct amdgpu_ring *ring, +- unsigned vm_id, uint64_t pd_addr, +- uint32_t gds_base, uint32_t gds_size, +- uint32_t gws_base, uint32_t gws_size, +- uint32_t oa_base, uint32_t oa_size) ++int amdgpu_vm_flush(struct amdgpu_ring *ring, ++ unsigned vm_id, uint64_t pd_addr, ++ uint32_t gds_base, uint32_t gds_size, ++ uint32_t gws_base, uint32_t gws_size, ++ uint32_t oa_base, uint32_t oa_size) + { + struct amdgpu_device *adev = ring->adev; +- struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; ++ struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; + bool gds_switch_needed = ring->funcs->emit_gds_switch && ( +- mgr_id->gds_base != gds_base || +- mgr_id->gds_size != gds_size || +- mgr_id->gws_base != gws_base || +- mgr_id->gws_size != gws_size || +- mgr_id->oa_base != oa_base || +- mgr_id->oa_size != oa_size); ++ id->gds_base != gds_base || ++ id->gds_size != gds_size || ++ id->gws_base != gws_base || ++ id->gws_size != gws_size || ++ id->oa_base != oa_base || ++ id->oa_size != oa_size); ++ int r; + + if (ring->funcs->emit_pipeline_sync && ( +- pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed)) ++ pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || ++ ring->type == AMDGPU_RING_TYPE_COMPUTE)) + amdgpu_ring_emit_pipeline_sync(ring); + +- if (pd_addr != AMDGPU_VM_NO_FLUSH) { ++ if (ring->funcs->emit_vm_flush && ++ pd_addr != AMDGPU_VM_NO_FLUSH) { ++ struct fence *fence; ++ + trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); + amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); ++ ++ mutex_lock(&adev->vm_manager.lock); ++ if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) { ++ r = amdgpu_fence_emit(ring, &fence); ++ if (r) { ++ mutex_unlock(&adev->vm_manager.lock); ++ return r; ++ } ++ fence_put(id->last_flush); ++ id->last_flush = fence; ++ } ++ mutex_unlock(&adev->vm_manager.lock); + } + + if (gds_switch_needed) { +- mgr_id->gds_base = gds_base; +- mgr_id->gds_size = gds_size; +- mgr_id->gws_base = gws_base; +- mgr_id->gws_size = gws_size; +- mgr_id->oa_base = oa_base; +- mgr_id->oa_size = oa_size; ++ id->gds_base = gds_base; ++ id->gds_size = gds_size; ++ id->gws_base = gws_base; ++ id->gws_size = gws_size; ++ id->oa_base = oa_base; ++ id->oa_size = oa_size; + amdgpu_ring_emit_gds_switch(ring, vm_id, + gds_base, gds_size, + gws_base, gws_size, + oa_base, oa_size); + } ++ ++ return 0; + } + + /** +@@ -310,14 +362,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, + */ + void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id) + { +- struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; +- +- mgr_id->gds_base = 0; +- mgr_id->gds_size = 0; +- mgr_id->gws_base = 0; +- mgr_id->gws_size = 0; +- mgr_id->oa_base = 0; +- mgr_id->oa_size = 0; ++ struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; ++ ++ id->gds_base = 0; ++ id->gds_size = 0; ++ id->gws_base = 0; ++ id->gws_size = 0; ++ id->oa_base = 0; ++ id->oa_size = 0; + } + + /** +@@ -349,9 +401,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, + * amdgpu_vm_update_pages - helper to call the right asic function + * + * @adev: amdgpu_device pointer +- * @gtt: GART instance to use for mapping +- * @gtt_flags: GTT hw access flags +- * @ib: indirect buffer to fill with commands ++ * @vm_update_params: see amdgpu_vm_update_params definition + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update +@@ -362,30 +412,29 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, + * to setup the page table using the DMA. + */ + static void amdgpu_vm_update_pages(struct amdgpu_device *adev, +- struct amdgpu_gart *gtt, +- uint32_t gtt_flags, +- struct amdgpu_ib *ib, ++ struct amdgpu_vm_update_params ++ *vm_update_params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint32_t flags) + { + trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); + +- if ((gtt == &adev->gart) && (flags == gtt_flags)) { +- uint64_t src = gtt->table_addr + (addr >> 12) * 8; +- amdgpu_vm_copy_pte(adev, ib, pe, src, count); ++ if (vm_update_params->src) { ++ amdgpu_vm_copy_pte(adev, vm_update_params->ib, ++ pe, (vm_update_params->src + (addr >> 12) * 8), count); + +- } else if (gtt) { +- dma_addr_t *pages_addr = gtt->pages_addr; +- amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr, +- count, incr, flags); ++ } else if (vm_update_params->pages_addr) { ++ amdgpu_vm_write_pte(adev, vm_update_params->ib, ++ vm_update_params->pages_addr, ++ pe, addr, count, incr, flags); + + } else if (count < 3) { +- amdgpu_vm_write_pte(adev, ib, NULL, pe, addr, ++ amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr, + count, incr, flags); + + } else { +- amdgpu_vm_set_pte_pde(adev, ib, pe, addr, ++ amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr, + count, incr, flags); + } + } +@@ -405,10 +454,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, + struct amdgpu_ring *ring; + struct fence *fence = NULL; + struct amdgpu_job *job; ++ struct amdgpu_vm_update_params vm_update_params; + unsigned entries; + uint64_t addr; + int r; + ++ memset(&vm_update_params, 0, sizeof(vm_update_params)); + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + + r = reservation_object_reserve_shared(bo->tbo.resv); +@@ -426,7 +477,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, + if (r) + goto error; + +- amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries, ++ vm_update_params.ib = &job->ibs[0]; ++ amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries, + 0, 0); + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + +@@ -499,11 +551,12 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0, pt_idx, ndw; + struct amdgpu_job *job; +- struct amdgpu_ib *ib; ++ struct amdgpu_vm_update_params vm_update_params; + struct fence *fence = NULL; + + int r; + ++ memset(&vm_update_params, 0, sizeof(vm_update_params)); + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + + /* padding, etc. */ +@@ -516,7 +569,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, + if (r) + return r; + +- ib = &job->ibs[0]; ++ vm_update_params.ib = &job->ibs[0]; + + /* walk over the address space and update the page directory */ + for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { +@@ -536,7 +589,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, + ((last_pt + incr * count) != pt)) { + + if (count) { +- amdgpu_vm_update_pages(adev, NULL, 0, ib, ++ amdgpu_vm_update_pages(adev, &vm_update_params, + last_pde, last_pt, + count, incr, + AMDGPU_PTE_VALID); +@@ -551,14 +604,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, + } + + if (count) +- amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt, +- count, incr, AMDGPU_PTE_VALID); ++ amdgpu_vm_update_pages(adev, &vm_update_params, ++ last_pde, last_pt, ++ count, incr, AMDGPU_PTE_VALID); + +- if (ib->length_dw != 0) { +- amdgpu_ring_pad_ib(ring, ib); ++ if (vm_update_params.ib->length_dw != 0) { ++ amdgpu_ring_pad_ib(ring, vm_update_params.ib); + amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, + AMDGPU_FENCE_OWNER_VM); +- WARN_ON(ib->length_dw > ndw); ++ WARN_ON(vm_update_params.ib->length_dw > ndw); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); + if (r) +@@ -584,18 +638,15 @@ error_free: + * amdgpu_vm_frag_ptes - add fragment information to PTEs + * + * @adev: amdgpu_device pointer +- * @gtt: GART instance to use for mapping +- * @gtt_flags: GTT hw mapping flags +- * @ib: IB for the update ++ * @vm_update_params: see amdgpu_vm_update_params definition + * @pe_start: first PTE to handle + * @pe_end: last PTE to handle + * @addr: addr those PTEs should point to + * @flags: hw mapping flags + */ + static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, +- struct amdgpu_gart *gtt, +- uint32_t gtt_flags, +- struct amdgpu_ib *ib, ++ struct amdgpu_vm_update_params ++ *vm_update_params, + uint64_t pe_start, uint64_t pe_end, + uint64_t addr, uint32_t flags) + { +@@ -632,10 +683,11 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, + return; + + /* system pages are non continuously */ +- if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { ++ if (vm_update_params->src || vm_update_params->pages_addr || ++ !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { + + count = (pe_end - pe_start) / 8; +- amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start, ++ amdgpu_vm_update_pages(adev, vm_update_params, pe_start, + addr, count, AMDGPU_GPU_PAGE_SIZE, + flags); + return; +@@ -644,21 +696,21 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, + /* handle the 4K area at the beginning */ + if (pe_start != frag_start) { + count = (frag_start - pe_start) / 8; +- amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr, ++ amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr, + count, AMDGPU_GPU_PAGE_SIZE, flags); + addr += AMDGPU_GPU_PAGE_SIZE * count; + } + + /* handle the area in the middle */ + count = (frag_end - frag_start) / 8; +- amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count, ++ amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count, + AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); + + /* handle the 4K area at the end */ + if (frag_end != pe_end) { + addr += AMDGPU_GPU_PAGE_SIZE * count; + count = (pe_end - frag_end) / 8; +- amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr, ++ amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr, + count, AMDGPU_GPU_PAGE_SIZE, flags); + } + } +@@ -667,8 +719,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, + * amdgpu_vm_update_ptes - make sure that page tables are valid + * + * @adev: amdgpu_device pointer +- * @gtt: GART instance to use for mapping +- * @gtt_flags: GTT hw mapping flags ++ * @vm_update_params: see amdgpu_vm_update_params definition + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range +@@ -678,10 +729,9 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, + * Update the page tables in the range @start - @end. + */ + static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, +- struct amdgpu_gart *gtt, +- uint32_t gtt_flags, ++ struct amdgpu_vm_update_params ++ *vm_update_params, + struct amdgpu_vm *vm, +- struct amdgpu_ib *ib, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) + { +@@ -707,7 +757,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, + + if (last_pe_end != pe_start) { + +- amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, ++ amdgpu_vm_frag_ptes(adev, vm_update_params, + last_pe_start, last_pe_end, + last_dst, flags); + +@@ -722,17 +772,16 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, + dst += nptes * AMDGPU_GPU_PAGE_SIZE; + } + +- amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, +- last_pe_start, last_pe_end, +- last_dst, flags); ++ amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start, ++ last_pe_end, last_dst, flags); + } + + /** + * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table + * + * @adev: amdgpu_device pointer +- * @gtt: GART instance to use for mapping +- * @gtt_flags: flags as they are used for GTT ++ * @src: address where to copy page table entries from ++ * @pages_addr: DMA addresses to use for mapping + * @vm: requested vm + * @start: start of mapped range + * @last: last mapped entry +@@ -744,8 +793,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, + * Returns 0 for success, -EINVAL for failure. + */ + static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, +- struct amdgpu_gart *gtt, +- uint32_t gtt_flags, ++ uint64_t src, ++ dma_addr_t *pages_addr, + struct amdgpu_vm *vm, + uint64_t start, uint64_t last, + uint32_t flags, uint64_t addr, +@@ -755,11 +804,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + void *owner = AMDGPU_FENCE_OWNER_VM; + unsigned nptes, ncmds, ndw; + struct amdgpu_job *job; +- struct amdgpu_ib *ib; ++ struct amdgpu_vm_update_params vm_update_params; + struct fence *f = NULL; + int r; + + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); ++ memset(&vm_update_params, 0, sizeof(vm_update_params)); ++ vm_update_params.src = src; ++ vm_update_params.pages_addr = pages_addr; + + /* sync to everything on unmapping */ + if (!(flags & AMDGPU_PTE_VALID)) +@@ -776,11 +828,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + /* padding, etc. */ + ndw = 64; + +- if ((gtt == &adev->gart) && (flags == gtt_flags)) { ++ if (vm_update_params.src) { + /* only copy commands needed */ + ndw += ncmds * 7; + +- } else if (gtt) { ++ } else if (vm_update_params.pages_addr) { + /* header for write data commands */ + ndw += ncmds * 4; + +@@ -799,7 +851,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + if (r) + return r; + +- ib = &job->ibs[0]; ++ vm_update_params.ib = &job->ibs[0]; + + r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, + owner); +@@ -810,11 +862,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + if (r) + goto error_free; + +- amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1, +- addr, flags); ++ amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start, ++ last + 1, addr, flags); + +- amdgpu_ring_pad_ib(ring, ib); +- WARN_ON(ib->length_dw > ndw); ++ amdgpu_ring_pad_ib(ring, vm_update_params.ib); ++ WARN_ON(vm_update_params.ib->length_dw > ndw); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &f); + if (r) +@@ -837,11 +889,12 @@ error_free: + * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks + * + * @adev: amdgpu_device pointer +- * @gtt: GART instance to use for mapping ++ * @gtt_flags: flags as they are used for GTT ++ * @pages_addr: DMA addresses to use for mapping + * @vm: requested vm + * @mapping: mapped range and flags to use for the update + * @addr: addr to set the area to +- * @gtt_flags: flags as they are used for GTT ++ * @flags: HW flags for the mapping + * @fence: optional resulting fence + * + * Split the mapping into smaller chunks so that each update fits +@@ -849,16 +902,16 @@ error_free: + * Returns 0 for success, -EINVAL for failure. + */ + static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, +- struct amdgpu_gart *gtt, + uint32_t gtt_flags, ++ dma_addr_t *pages_addr, + struct amdgpu_vm *vm, + struct amdgpu_bo_va_mapping *mapping, +- uint64_t addr, struct fence **fence) ++ uint32_t flags, uint64_t addr, ++ struct fence **fence) + { + const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; + +- uint64_t start = mapping->it.start; +- uint32_t flags = gtt_flags; ++ uint64_t src = 0, start = mapping->it.start; + int r; + + /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here +@@ -871,10 +924,15 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + + trace_amdgpu_vm_bo_update(mapping); + ++ if (pages_addr) { ++ if (flags == gtt_flags) ++ src = adev->gart.table_addr + (addr >> 12) * 8; ++ addr = 0; ++ } + addr += mapping->offset; + +- if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags))) +- return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, ++ if (!pages_addr || src) ++ return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, + start, mapping->it.last, + flags, addr, fence); + +@@ -882,7 +940,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + uint64_t last; + + last = min((uint64_t)mapping->it.last, start + max_size - 1); +- r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, ++ r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, + start, last, flags, addr, + fence); + if (r) +@@ -913,16 +971,20 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, + { + struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo_va_mapping *mapping; +- struct amdgpu_gart *gtt = NULL; +- uint32_t flags; ++ dma_addr_t *pages_addr = NULL; ++ uint32_t gtt_flags, flags; + uint64_t addr; + int r; + + if (mem) { ++ struct ttm_dma_tt *ttm; ++ + addr = (u64)mem->start << PAGE_SHIFT; + switch (mem->mem_type) { + case TTM_PL_TT: +- gtt = &bo_va->bo->adev->gart; ++ ttm = container_of(bo_va->bo->tbo.ttm, struct ++ ttm_dma_tt, ttm); ++ pages_addr = ttm->dma_address; + break; + + case TTM_PL_VRAM: +@@ -937,6 +999,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, + } + + flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); ++ gtt_flags = (adev == bo_va->bo->adev) ? flags : 0; + + spin_lock(&vm->status_lock); + if (!list_empty(&bo_va->vm_status)) +@@ -944,7 +1007,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, + spin_unlock(&vm->status_lock); + + list_for_each_entry(mapping, &bo_va->invalids, list) { +- r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr, ++ r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm, ++ mapping, flags, addr, + &bo_va->last_pt_update); + if (r) + return r; +@@ -985,22 +1049,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping; + int r; + +- spin_lock(&vm->freed_lock); + while (!list_empty(&vm->freed)) { + mapping = list_first_entry(&vm->freed, + struct amdgpu_bo_va_mapping, list); + list_del(&mapping->list); +- spin_unlock(&vm->freed_lock); +- r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping, +- 0, NULL); ++ ++ r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping, ++ 0, 0, NULL); + kfree(mapping); + if (r) + return r; + +- spin_lock(&vm->freed_lock); + } +- spin_unlock(&vm->freed_lock); +- + return 0; + + } +@@ -1027,9 +1087,8 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, + bo_va = list_first_entry(&vm->invalidated, + struct amdgpu_bo_va, vm_status); + spin_unlock(&vm->status_lock); +- mutex_lock(&bo_va->mutex); ++ + r = amdgpu_vm_bo_update(adev, bo_va, NULL); +- mutex_unlock(&bo_va->mutex); + if (r) + return r; + +@@ -1073,7 +1132,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, + INIT_LIST_HEAD(&bo_va->valids); + INIT_LIST_HEAD(&bo_va->invalids); + INIT_LIST_HEAD(&bo_va->vm_status); +- mutex_init(&bo_va->mutex); ++ + list_add_tail(&bo_va->bo_list, &bo->va); + + return bo_va; +@@ -1125,9 +1184,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, + saddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr /= AMDGPU_GPU_PAGE_SIZE; + +- spin_lock(&vm->it_lock); + it = interval_tree_iter_first(&vm->va, saddr, eaddr); +- spin_unlock(&vm->it_lock); + if (it) { + struct amdgpu_bo_va_mapping *tmp; + tmp = container_of(it, struct amdgpu_bo_va_mapping, it); +@@ -1151,13 +1208,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, + mapping->offset = offset; + mapping->flags = flags; + +- mutex_lock(&bo_va->mutex); + list_add(&mapping->list, &bo_va->invalids); +- mutex_unlock(&bo_va->mutex); +- spin_lock(&vm->it_lock); + interval_tree_insert(&mapping->it, &vm->va); +- spin_unlock(&vm->it_lock); +- trace_amdgpu_vm_bo_map(bo_va, mapping); + + /* Make sure the page tables are allocated */ + saddr >>= amdgpu_vm_block_size; +@@ -1209,9 +1261,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, + + error_free: + list_del(&mapping->list); +- spin_lock(&vm->it_lock); + interval_tree_remove(&mapping->it, &vm->va); +- spin_unlock(&vm->it_lock); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); + kfree(mapping); + +@@ -1240,7 +1290,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, + bool valid = true; + + saddr /= AMDGPU_GPU_PAGE_SIZE; +- mutex_lock(&bo_va->mutex); ++ + list_for_each_entry(mapping, &bo_va->valids, list) { + if (mapping->it.start == saddr) + break; +@@ -1254,25 +1304,18 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, + break; + } + +- if (&mapping->list == &bo_va->invalids) { +- mutex_unlock(&bo_va->mutex); ++ if (&mapping->list == &bo_va->invalids) + return -ENOENT; +- } + } +- mutex_unlock(&bo_va->mutex); ++ + list_del(&mapping->list); +- spin_lock(&vm->it_lock); + interval_tree_remove(&mapping->it, &vm->va); +- spin_unlock(&vm->it_lock); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); + +- if (valid) { +- spin_lock(&vm->freed_lock); ++ if (valid) + list_add(&mapping->list, &vm->freed); +- spin_unlock(&vm->freed_lock); +- } else { ++ else + kfree(mapping); +- } + + return 0; + } +@@ -1301,23 +1344,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, + + list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { + list_del(&mapping->list); +- spin_lock(&vm->it_lock); + interval_tree_remove(&mapping->it, &vm->va); +- spin_unlock(&vm->it_lock); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); +- spin_lock(&vm->freed_lock); + list_add(&mapping->list, &vm->freed); +- spin_unlock(&vm->freed_lock); + } + list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { + list_del(&mapping->list); +- spin_lock(&vm->it_lock); + interval_tree_remove(&mapping->it, &vm->va); +- spin_unlock(&vm->it_lock); + kfree(mapping); + } ++ + fence_put(bo_va->last_pt_update); +- mutex_destroy(&bo_va->mutex); + kfree(bo_va); + } + +@@ -1361,17 +1398,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) + struct amd_sched_rq *rq; + int i, r; + +- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { +- vm->ids[i].mgr_id = NULL; +- vm->ids[i].flushed_updates = NULL; +- } ++ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) ++ vm->ids[i] = NULL; + vm->va = RB_ROOT; ++ vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); + spin_lock_init(&vm->status_lock); + INIT_LIST_HEAD(&vm->invalidated); + INIT_LIST_HEAD(&vm->cleared); + INIT_LIST_HEAD(&vm->freed); +- spin_lock_init(&vm->it_lock); +- spin_lock_init(&vm->freed_lock); ++ + pd_size = amdgpu_vm_directory_size(adev); + pd_entries = amdgpu_vm_num_pdes(adev); + +@@ -1458,14 +1493,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) + + amdgpu_bo_unref(&vm->page_directory); + fence_put(vm->page_directory_fence); +- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { +- struct amdgpu_vm_id *id = &vm->ids[i]; +- +- if (id->mgr_id) +- atomic_long_cmpxchg(&id->mgr_id->owner, +- (long)id, 0); +- fence_put(id->flushed_updates); +- } + } + + /** +@@ -1484,11 +1511,13 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) + /* skip over VMID 0, since it is the system VM */ + for (i = 1; i < adev->vm_manager.num_ids; ++i) { + amdgpu_vm_reset_id(adev, i); ++ amdgpu_sync_create(&adev->vm_manager.ids[i].active); + list_add_tail(&adev->vm_manager.ids[i].list, + &adev->vm_manager.ids_lru); + } + + atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); ++ atomic64_set(&adev->vm_manager.client_counter, 0); + } + + /** +@@ -1502,6 +1531,11 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) + { + unsigned i; + +- for (i = 0; i < AMDGPU_NUM_VM; ++i) +- fence_put(adev->vm_manager.ids[i].active); ++ for (i = 0; i < AMDGPU_NUM_VM; ++i) { ++ struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; ++ ++ fence_put(adev->vm_manager.ids[i].first); ++ amdgpu_sync_free(&adev->vm_manager.ids[i].active); ++ fence_put(id->flushed_updates); ++ } + } +diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h +index fece8f4..49daf6d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/atom.h ++++ b/drivers/gpu/drm/amd/amdgpu/atom.h +@@ -92,7 +92,7 @@ + #define ATOM_WS_AND_MASK 0x45 + #define ATOM_WS_FB_WINDOW 0x46 + #define ATOM_WS_ATTRIBUTES 0x47 +-#define ATOM_WS_REGPTR 0x48 ++#define ATOM_WS_REGPTR 0x48 + + #define ATOM_IIO_NOP 0 + #define ATOM_IIO_START 1 +diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +index 49aa350..49a39b1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c ++++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +@@ -461,13 +461,14 @@ union set_pixel_clock { + PIXEL_CLOCK_PARAMETERS_V3 v3; + PIXEL_CLOCK_PARAMETERS_V5 v5; + PIXEL_CLOCK_PARAMETERS_V6 v6; ++ PIXEL_CLOCK_PARAMETERS_V7 v7; + }; + + /* on DCE5, make sure the voltage is high enough to support the + * required disp clk. + */ + void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, +- u32 dispclk) ++ u32 dispclk) + { + u8 frev, crev; + int index; +@@ -510,6 +511,49 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + } + ++union set_dce_clock { ++ SET_DCE_CLOCK_PS_ALLOCATION_V1_1 v1_1; ++ SET_DCE_CLOCK_PS_ALLOCATION_V2_1 v2_1; ++}; ++ ++u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev, ++ u32 freq, u8 clk_type, u8 clk_src) ++{ ++ u8 frev, crev; ++ int index; ++ union set_dce_clock args; ++ u32 ret_freq = 0; ++ ++ memset(&args, 0, sizeof(args)); ++ ++ index = GetIndexIntoMasterTable(COMMAND, SetDCEClock); ++ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, ++ &crev)) ++ return 0; ++ ++ switch (frev) { ++ case 2: ++ switch (crev) { ++ case 1: ++ args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */ ++ args.v2_1.asParam.ucDCEClkType = clk_type; ++ args.v2_1.asParam.ucDCEClkSrc = clk_src; ++ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); ++ ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10; ++ break; ++ default: ++ DRM_ERROR("Unknown table version %d %d\n", frev, crev); ++ return 0; ++ } ++ break; ++ default: ++ DRM_ERROR("Unknown table version %d %d\n", frev, crev); ++ return 0; ++ } ++ ++ return ret_freq; ++} ++ + static bool is_pixel_clock_source_from_pll(u32 encoder_mode, int pll_id) + { + if (ENCODER_MODE_IS_DP(encoder_mode)) { +@@ -523,18 +567,18 @@ static bool is_pixel_clock_source_from_pll(u32 encoder_mode, int pll_id) + } + + void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, +- u32 crtc_id, +- int pll_id, +- u32 encoder_mode, +- u32 encoder_id, +- u32 clock, +- u32 ref_div, +- u32 fb_div, +- u32 frac_fb_div, +- u32 post_div, +- int bpc, +- bool ss_enabled, +- struct amdgpu_atom_ss *ss) ++ u32 crtc_id, ++ int pll_id, ++ u32 encoder_mode, ++ u32 encoder_id, ++ u32 clock, ++ u32 ref_div, ++ u32 fb_div, ++ u32 frac_fb_div, ++ u32 post_div, ++ int bpc, ++ bool ss_enabled, ++ struct amdgpu_atom_ss *ss) + { + struct drm_device *dev = crtc->dev; + struct amdgpu_device *adev = dev->dev_private; +@@ -652,6 +696,34 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, + args.v6.ucEncoderMode = encoder_mode; + args.v6.ucPpll = pll_id; + break; ++ case 7: ++ args.v7.ulPixelClock = cpu_to_le32(clock * 10); /* 100 hz units */ ++ args.v7.ucMiscInfo = 0; ++ if ((encoder_mode == ATOM_ENCODER_MODE_DVI) && ++ (clock > 165000)) ++ args.v7.ucMiscInfo |= PIXEL_CLOCK_V7_MISC_DVI_DUALLINK_EN; ++ args.v7.ucCRTC = crtc_id; ++ if (encoder_mode == ATOM_ENCODER_MODE_HDMI) { ++ switch (bpc) { ++ case 8: ++ default: ++ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_DIS; ++ break; ++ case 10: ++ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_5_4; ++ break; ++ case 12: ++ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_3_2; ++ break; ++ case 16: ++ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_2_1; ++ break; ++ } ++ } ++ args.v7.ucTransmitterID = encoder_id; ++ args.v7.ucEncoderMode = encoder_mode; ++ args.v7.ucPpll = pll_id; ++ break; + default: + DRM_ERROR("Unknown table version %d %d\n", frev, crev); + return; +diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h +index c670833..0eeda8e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h ++++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h +@@ -37,6 +37,8 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc, + struct drm_display_mode *mode); + void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, + u32 dispclk); ++u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev, ++ u32 freq, u8 clk_type, u8 clk_src); + void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, + u32 crtc_id, + int pll_id, +diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +index 1e0bba2..48b6bd6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c ++++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +@@ -298,6 +298,10 @@ bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder, + && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2))) + adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2; + ++ /* vertical FP must be at least 1 */ ++ if (mode->crtc_vsync_start == mode->crtc_vdisplay) ++ adjusted_mode->crtc_vsync_start++; ++ + /* get the native mode for scaling */ + if (amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) + amdgpu_panel_mode_fixup(encoder, adjusted_mode); +@@ -563,6 +567,7 @@ union dig_encoder_control { + DIG_ENCODER_CONTROL_PARAMETERS_V2 v2; + DIG_ENCODER_CONTROL_PARAMETERS_V3 v3; + DIG_ENCODER_CONTROL_PARAMETERS_V4 v4; ++ DIG_ENCODER_CONTROL_PARAMETERS_V5 v5; + }; + + void +@@ -690,6 +695,47 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder, + else + args.v4.ucHPD_ID = hpd_id + 1; + break; ++ case 5: ++ switch (action) { ++ case ATOM_ENCODER_CMD_SETUP_PANEL_MODE: ++ args.v5.asDPPanelModeParam.ucAction = action; ++ args.v5.asDPPanelModeParam.ucPanelMode = panel_mode; ++ args.v5.asDPPanelModeParam.ucDigId = dig->dig_encoder; ++ break; ++ case ATOM_ENCODER_CMD_STREAM_SETUP: ++ args.v5.asStreamParam.ucAction = action; ++ args.v5.asStreamParam.ucDigId = dig->dig_encoder; ++ args.v5.asStreamParam.ucDigMode = ++ amdgpu_atombios_encoder_get_encoder_mode(encoder); ++ if (ENCODER_MODE_IS_DP(args.v5.asStreamParam.ucDigMode)) ++ args.v5.asStreamParam.ucLaneNum = dp_lane_count; ++ else if (amdgpu_dig_monitor_is_duallink(encoder, ++ amdgpu_encoder->pixel_clock)) ++ args.v5.asStreamParam.ucLaneNum = 8; ++ else ++ args.v5.asStreamParam.ucLaneNum = 4; ++ args.v5.asStreamParam.ulPixelClock = ++ cpu_to_le32(amdgpu_encoder->pixel_clock / 10); ++ args.v5.asStreamParam.ucBitPerColor = ++ amdgpu_atombios_encoder_get_bpc(encoder); ++ args.v5.asStreamParam.ucLinkRateIn270Mhz = dp_clock / 27000; ++ break; ++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_START: ++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1: ++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2: ++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN3: ++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN4: ++ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE: ++ case ATOM_ENCODER_CMD_DP_VIDEO_OFF: ++ case ATOM_ENCODER_CMD_DP_VIDEO_ON: ++ args.v5.asCmdParam.ucAction = action; ++ args.v5.asCmdParam.ucDigId = dig->dig_encoder; ++ break; ++ default: ++ DRM_ERROR("Unsupported action 0x%x\n", action); ++ break; ++ } ++ break; + default: + DRM_ERROR("Unknown table version %d, %d\n", frev, crev); + break; +@@ -710,11 +756,12 @@ union dig_transmitter_control { + DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3; + DIG_TRANSMITTER_CONTROL_PARAMETERS_V4 v4; + DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 v5; ++ DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6 v6; + }; + + void + amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int action, +- uint8_t lane_num, uint8_t lane_set) ++ uint8_t lane_num, uint8_t lane_set) + { + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; +@@ -1066,6 +1113,54 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a + args.v5.ucDigEncoderSel = 1 << dig_encoder; + args.v5.ucDPLaneSet = lane_set; + break; ++ case 6: ++ args.v6.ucAction = action; ++ if (is_dp) ++ args.v6.ulSymClock = cpu_to_le32(dp_clock / 10); ++ else ++ args.v6.ulSymClock = cpu_to_le32(amdgpu_encoder->pixel_clock / 10); ++ ++ switch (amdgpu_encoder->encoder_id) { ++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: ++ if (dig->linkb) ++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYB; ++ else ++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYA; ++ break; ++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: ++ if (dig->linkb) ++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYD; ++ else ++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYC; ++ break; ++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: ++ if (dig->linkb) ++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYF; ++ else ++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYE; ++ break; ++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: ++ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYG; ++ break; ++ } ++ if (is_dp) ++ args.v6.ucLaneNum = dp_lane_count; ++ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) ++ args.v6.ucLaneNum = 8; ++ else ++ args.v6.ucLaneNum = 4; ++ args.v6.ucConnObjId = connector_object_id; ++ if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) ++ args.v6.ucDPLaneSet = lane_set; ++ else ++ args.v6.ucDigMode = amdgpu_atombios_encoder_get_encoder_mode(encoder); ++ ++ if (hpd_id == AMDGPU_HPD_NONE) ++ args.v6.ucHPDSel = 0; ++ else ++ args.v6.ucHPDSel = hpd_id + 1; ++ args.v6.ucDigEncoderSel = 1 << dig_encoder; ++ break; + default: + DRM_ERROR("Unknown table version %d, %d\n", frev, crev); + break; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index f0c7b35..494104e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -27,6 +27,7 @@ + #include "vi.h" + #include "vid.h" + #include "amdgpu_ucode.h" ++#include "amdgpu_atombios.h" + #include "clearstate_vi.h" + + #include "gmc/gmc_8_2_d.h" +@@ -51,6 +52,7 @@ + + #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 + #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 ++#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 + #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 + + #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) +@@ -84,6 +86,8 @@ enum { + BPM_REG_FGCG_MAX + }; + ++#define RLC_FormatDirectRegListLength 14 ++ + MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); + MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); + MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); +@@ -117,6 +121,20 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); + MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); + MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); + ++MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); ++ ++MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); ++ + static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = + { + {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, +@@ -247,6 +265,66 @@ static const u32 tonga_mgcg_cgcg_init[] = + mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, + }; + ++static const u32 golden_settings_polaris11_a11[] = ++{ ++ mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, ++ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, ++ mmDB_DEBUG2, 0xf00fffff, 0x00000400, ++ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, ++ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, ++ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, ++ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, ++ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, ++ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, ++ mmSQ_CONFIG, 0x07f80000, 0x07180000, ++ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, ++ mmTCC_CTRL, 0x00100000, 0xf31fff7f, ++ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, ++ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, ++ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, ++}; ++ ++static const u32 polaris11_golden_common_all[] = ++{ ++ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, ++ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, ++ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, ++ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, ++ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, ++ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, ++}; ++ ++static const u32 golden_settings_polaris10_a11[] = ++{ ++ mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, ++ mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, ++ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, ++ mmDB_DEBUG2, 0xf00fffff, 0x00000400, ++ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, ++ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, ++ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, ++ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, ++ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, ++ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, ++ mmSQ_CONFIG, 0x07f80000, 0x07180000, ++ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, ++ mmTCC_CTRL, 0x00100000, 0xf31fff7f, ++ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, ++ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, ++}; ++ ++static const u32 polaris10_golden_common_all[] = ++{ ++ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, ++ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, ++ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, ++ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, ++ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, ++ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, ++ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, ++ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, ++}; ++ + static const u32 fiji_golden_common_all[] = + { + mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, +@@ -527,7 +605,7 @@ static const u32 stoney_golden_settings_a11[] = + mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, + mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, +- mmTCC_CTRL, 0x00100000, 0xf31fff7f, ++ mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, + mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, + mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, +@@ -558,6 +636,9 @@ static const u32 stoney_mgcg_cgcg_init[] = + static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); + static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); + static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); ++static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); ++static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); ++static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); + + static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) + { +@@ -596,6 +677,22 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) + tonga_golden_common_all, + (const u32)ARRAY_SIZE(tonga_golden_common_all)); + break; ++ case CHIP_POLARIS11: ++ amdgpu_program_register_sequence(adev, ++ golden_settings_polaris11_a11, ++ (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); ++ amdgpu_program_register_sequence(adev, ++ polaris11_golden_common_all, ++ (const u32)ARRAY_SIZE(polaris11_golden_common_all)); ++ break; ++ case CHIP_POLARIS10: ++ amdgpu_program_register_sequence(adev, ++ golden_settings_polaris10_a11, ++ (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); ++ amdgpu_program_register_sequence(adev, ++ polaris10_golden_common_all, ++ (const u32)ARRAY_SIZE(polaris10_golden_common_all)); ++ break; + case CHIP_CARRIZO: + amdgpu_program_register_sequence(adev, + cz_mgcg_cgcg_init, +@@ -706,7 +803,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) + ib.ptr[2] = 0xDEADBEEF; + ib.length_dw = 3; + +- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); ++ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); + if (r) + goto err2; + +@@ -747,6 +844,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; ++ const struct rlc_firmware_header_v2_0 *rlc_hdr; ++ unsigned int *tmp = NULL, i; + + DRM_DEBUG("\n"); + +@@ -763,6 +862,12 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) + case CHIP_FIJI: + chip_name = "fiji"; + break; ++ case CHIP_POLARIS11: ++ chip_name = "polaris11"; ++ break; ++ case CHIP_POLARIS10: ++ chip_name = "polaris10"; ++ break; + case CHIP_STONEY: + chip_name = "stoney"; + break; +@@ -808,9 +913,49 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); +- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; +- adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); +- adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); ++ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; ++ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); ++ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); ++ ++ adev->gfx.rlc.save_and_restore_offset = ++ le32_to_cpu(rlc_hdr->save_and_restore_offset); ++ adev->gfx.rlc.clear_state_descriptor_offset = ++ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); ++ adev->gfx.rlc.avail_scratch_ram_locations = ++ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); ++ adev->gfx.rlc.reg_restore_list_size = ++ le32_to_cpu(rlc_hdr->reg_restore_list_size); ++ adev->gfx.rlc.reg_list_format_start = ++ le32_to_cpu(rlc_hdr->reg_list_format_start); ++ adev->gfx.rlc.reg_list_format_separate_start = ++ le32_to_cpu(rlc_hdr->reg_list_format_separate_start); ++ adev->gfx.rlc.starting_offsets_start = ++ le32_to_cpu(rlc_hdr->starting_offsets_start); ++ adev->gfx.rlc.reg_list_format_size_bytes = ++ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); ++ adev->gfx.rlc.reg_list_size_bytes = ++ le32_to_cpu(rlc_hdr->reg_list_size_bytes); ++ ++ adev->gfx.rlc.register_list_format = ++ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + ++ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); ++ ++ if (!adev->gfx.rlc.register_list_format) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ tmp = (unsigned int *)((uintptr_t)rlc_hdr + ++ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); ++ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) ++ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); ++ ++ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; ++ ++ tmp = (unsigned int *)((uintptr_t)rlc_hdr + ++ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); ++ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) ++ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); +@@ -911,6 +1056,270 @@ out: + return err; + } + ++static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, ++ volatile u32 *buffer) ++{ ++ u32 count = 0, i; ++ const struct cs_section_def *sect = NULL; ++ const struct cs_extent_def *ext = NULL; ++ ++ if (adev->gfx.rlc.cs_data == NULL) ++ return; ++ if (buffer == NULL) ++ return; ++ ++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); ++ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); ++ ++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); ++ buffer[count++] = cpu_to_le32(0x80000000); ++ buffer[count++] = cpu_to_le32(0x80000000); ++ ++ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { ++ for (ext = sect->section; ext->extent != NULL; ++ext) { ++ if (sect->id == SECT_CONTEXT) { ++ buffer[count++] = ++ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); ++ buffer[count++] = cpu_to_le32(ext->reg_index - ++ PACKET3_SET_CONTEXT_REG_START); ++ for (i = 0; i < ext->reg_count; i++) ++ buffer[count++] = cpu_to_le32(ext->extent[i]); ++ } else { ++ return; ++ } ++ } ++ } ++ ++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); ++ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - ++ PACKET3_SET_CONTEXT_REG_START); ++ switch (adev->asic_type) { ++ case CHIP_TONGA: ++ case CHIP_POLARIS10: ++ buffer[count++] = cpu_to_le32(0x16000012); ++ buffer[count++] = cpu_to_le32(0x0000002A); ++ break; ++ case CHIP_POLARIS11: ++ buffer[count++] = cpu_to_le32(0x16000012); ++ buffer[count++] = cpu_to_le32(0x00000000); ++ break; ++ case CHIP_FIJI: ++ buffer[count++] = cpu_to_le32(0x3a00161a); ++ buffer[count++] = cpu_to_le32(0x0000002e); ++ break; ++ case CHIP_TOPAZ: ++ case CHIP_CARRIZO: ++ buffer[count++] = cpu_to_le32(0x00000002); ++ buffer[count++] = cpu_to_le32(0x00000000); ++ break; ++ case CHIP_STONEY: ++ buffer[count++] = cpu_to_le32(0x00000000); ++ buffer[count++] = cpu_to_le32(0x00000000); ++ break; ++ default: ++ buffer[count++] = cpu_to_le32(0x00000000); ++ buffer[count++] = cpu_to_le32(0x00000000); ++ break; ++ } ++ ++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); ++ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); ++ ++ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); ++ buffer[count++] = cpu_to_le32(0); ++} ++ ++static void cz_init_cp_jump_table(struct amdgpu_device *adev) ++{ ++ const __le32 *fw_data; ++ volatile u32 *dst_ptr; ++ int me, i, max_me = 4; ++ u32 bo_offset = 0; ++ u32 table_offset, table_size; ++ ++ if (adev->asic_type == CHIP_CARRIZO) ++ max_me = 5; ++ ++ /* write the cp table buffer */ ++ dst_ptr = adev->gfx.rlc.cp_table_ptr; ++ for (me = 0; me < max_me; me++) { ++ if (me == 0) { ++ const struct gfx_firmware_header_v1_0 *hdr = ++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; ++ fw_data = (const __le32 *) ++ (adev->gfx.ce_fw->data + ++ le32_to_cpu(hdr->header.ucode_array_offset_bytes)); ++ table_offset = le32_to_cpu(hdr->jt_offset); ++ table_size = le32_to_cpu(hdr->jt_size); ++ } else if (me == 1) { ++ const struct gfx_firmware_header_v1_0 *hdr = ++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; ++ fw_data = (const __le32 *) ++ (adev->gfx.pfp_fw->data + ++ le32_to_cpu(hdr->header.ucode_array_offset_bytes)); ++ table_offset = le32_to_cpu(hdr->jt_offset); ++ table_size = le32_to_cpu(hdr->jt_size); ++ } else if (me == 2) { ++ const struct gfx_firmware_header_v1_0 *hdr = ++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; ++ fw_data = (const __le32 *) ++ (adev->gfx.me_fw->data + ++ le32_to_cpu(hdr->header.ucode_array_offset_bytes)); ++ table_offset = le32_to_cpu(hdr->jt_offset); ++ table_size = le32_to_cpu(hdr->jt_size); ++ } else if (me == 3) { ++ const struct gfx_firmware_header_v1_0 *hdr = ++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; ++ fw_data = (const __le32 *) ++ (adev->gfx.mec_fw->data + ++ le32_to_cpu(hdr->header.ucode_array_offset_bytes)); ++ table_offset = le32_to_cpu(hdr->jt_offset); ++ table_size = le32_to_cpu(hdr->jt_size); ++ } else if (me == 4) { ++ const struct gfx_firmware_header_v1_0 *hdr = ++ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; ++ fw_data = (const __le32 *) ++ (adev->gfx.mec2_fw->data + ++ le32_to_cpu(hdr->header.ucode_array_offset_bytes)); ++ table_offset = le32_to_cpu(hdr->jt_offset); ++ table_size = le32_to_cpu(hdr->jt_size); ++ } ++ ++ for (i = 0; i < table_size; i ++) { ++ dst_ptr[bo_offset + i] = ++ cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); ++ } ++ ++ bo_offset += table_size; ++ } ++} ++ ++static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) ++{ ++ int r; ++ ++ /* clear state block */ ++ if (adev->gfx.rlc.clear_state_obj) { ++ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); ++ if (unlikely(r != 0)) ++ dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); ++ amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); ++ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); ++ ++ amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); ++ adev->gfx.rlc.clear_state_obj = NULL; ++ } ++ ++ /* jump table block */ ++ if (adev->gfx.rlc.cp_table_obj) { ++ r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); ++ if (unlikely(r != 0)) ++ dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); ++ amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); ++ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); ++ ++ amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); ++ adev->gfx.rlc.cp_table_obj = NULL; ++ } ++} ++ ++static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) ++{ ++ volatile u32 *dst_ptr; ++ u32 dws; ++ const struct cs_section_def *cs_data; ++ int r; ++ ++ adev->gfx.rlc.cs_data = vi_cs_data; ++ ++ cs_data = adev->gfx.rlc.cs_data; ++ ++ if (cs_data) { ++ /* clear state block */ ++ adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); ++ ++ if (adev->gfx.rlc.clear_state_obj == NULL) { ++ r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, ++ AMDGPU_GEM_DOMAIN_VRAM, ++ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, ++ NULL, NULL, ++ &adev->gfx.rlc.clear_state_obj); ++ if (r) { ++ dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); ++ gfx_v8_0_rlc_fini(adev); ++ return r; ++ } ++ } ++ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); ++ if (unlikely(r != 0)) { ++ gfx_v8_0_rlc_fini(adev); ++ return r; ++ } ++ r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, ++ &adev->gfx.rlc.clear_state_gpu_addr); ++ if (r) { ++ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); ++ dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); ++ gfx_v8_0_rlc_fini(adev); ++ return r; ++ } ++ ++ r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); ++ if (r) { ++ dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); ++ gfx_v8_0_rlc_fini(adev); ++ return r; ++ } ++ /* set up the cs buffer */ ++ dst_ptr = adev->gfx.rlc.cs_ptr; ++ gfx_v8_0_get_csb_buffer(adev, dst_ptr); ++ amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); ++ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); ++ } ++ ++ if ((adev->asic_type == CHIP_CARRIZO) || ++ (adev->asic_type == CHIP_STONEY)) { ++ adev->gfx.rlc.cp_table_size = (96 * 5 * 4) + (64 * 1024); /* JT + GDS */ ++ if (adev->gfx.rlc.cp_table_obj == NULL) { ++ r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, ++ AMDGPU_GEM_DOMAIN_VRAM, ++ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, ++ NULL, NULL, ++ &adev->gfx.rlc.cp_table_obj); ++ if (r) { ++ dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); ++ return r; ++ } ++ } ++ ++ r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); ++ if (unlikely(r != 0)) { ++ dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); ++ return r; ++ } ++ r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, ++ &adev->gfx.rlc.cp_table_gpu_addr); ++ if (r) { ++ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); ++ dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r); ++ return r; ++ } ++ r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); ++ if (r) { ++ dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); ++ return r; ++ } ++ ++ cz_init_cp_jump_table(adev); ++ ++ amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); ++ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); ++ ++ } ++ ++ return 0; ++} ++ + static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) + { + int r; +@@ -1262,7 +1671,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* shedule the ib on the ring */ +- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); ++ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); + if (r) { + DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + goto fail; +@@ -1296,12 +1705,13 @@ fail: + return r; + } + +-static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) ++static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) + { + u32 gb_addr_config; + u32 mc_shared_chmap, mc_arb_ramcfg; + u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; + u32 tmp; ++ int ret; + + switch (adev->asic_type) { + case CHIP_TOPAZ: +@@ -1338,6 +1748,34 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; + break; ++ case CHIP_POLARIS11: ++ ret = amdgpu_atombios_get_gfx_info(adev); ++ if (ret) ++ return ret; ++ adev->gfx.config.max_gprs = 256; ++ adev->gfx.config.max_gs_threads = 32; ++ adev->gfx.config.max_hw_contexts = 8; ++ ++ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; ++ adev->gfx.config.sc_prim_fifo_size_backend = 0x100; ++ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; ++ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; ++ gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; ++ break; ++ case CHIP_POLARIS10: ++ ret = amdgpu_atombios_get_gfx_info(adev); ++ if (ret) ++ return ret; ++ adev->gfx.config.max_gprs = 256; ++ adev->gfx.config.max_gs_threads = 32; ++ adev->gfx.config.max_hw_contexts = 8; ++ ++ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; ++ adev->gfx.config.sc_prim_fifo_size_backend = 0x100; ++ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; ++ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; ++ gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; ++ break; + case CHIP_TONGA: + adev->gfx.config.max_shader_engines = 4; + adev->gfx.config.max_tile_pipes = 8; +@@ -1520,6 +1958,8 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) + break; + } + adev->gfx.config.gb_addr_config = gb_addr_config; ++ ++ return 0; + } + + static int gfx_v8_0_sw_init(void *handle) +@@ -1553,6 +1993,12 @@ static int gfx_v8_0_sw_init(void *handle) + return r; + } + ++ r = gfx_v8_0_rlc_init(adev); ++ if (r) { ++ DRM_ERROR("Failed to init rlc BOs!\n"); ++ return r; ++ } ++ + r = gfx_v8_0_mec_init(adev); + if (r) { + DRM_ERROR("Failed to init MEC BOs!\n"); +@@ -1570,7 +2016,7 @@ static int gfx_v8_0_sw_init(void *handle) + ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; + } + +- r = amdgpu_ring_init(adev, ring, 1024 * 1024, ++ r = amdgpu_ring_init(adev, ring, 1024, + PACKET3(PACKET3_NOP, 0x3FFF), 0xf, + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, + AMDGPU_RING_TYPE_GFX); +@@ -1594,10 +2040,10 @@ static int gfx_v8_0_sw_init(void *handle) + ring->me = 1; /* first MEC */ + ring->pipe = i / 8; + ring->queue = i % 8; +- sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); ++ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; + /* type-2 packets are deprecated on MEC, use type-3 instead */ +- r = amdgpu_ring_init(adev, ring, 1024 * 1024, ++ r = amdgpu_ring_init(adev, ring, 1024, + PACKET3(PACKET3_NOP, 0x3FFF), 0xf, + &adev->gfx.eop_irq, irq_type, + AMDGPU_RING_TYPE_COMPUTE); +@@ -1629,7 +2075,9 @@ static int gfx_v8_0_sw_init(void *handle) + + adev->gfx.ce_ram_size = 0x8000; + +- gfx_v8_0_gpu_early_init(adev); ++ r = gfx_v8_0_gpu_early_init(adev); ++ if (r) ++ return r; + + return 0; + } +@@ -1650,6 +2098,10 @@ static int gfx_v8_0_sw_fini(void *handle) + + gfx_v8_0_mec_fini(adev); + ++ gfx_v8_0_rlc_fini(adev); ++ ++ kfree(adev->gfx.rlc.register_list_format); ++ + return 0; + } + +@@ -2219,77 +2671,481 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); + + break; +- case CHIP_STONEY: ++ case CHIP_POLARIS11: + modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | +- PIPE_CONFIG(ADDR_SURF_P2)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16)); + modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | +- PIPE_CONFIG(ADDR_SURF_P2) | +- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | +- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ ++ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | ++ NUM_BANKS(ADDR_SURF_8_BANK)); ++ ++ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | ++ NUM_BANKS(ADDR_SURF_4_BANK)); ++ ++ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) ++ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); ++ ++ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) ++ if (reg_offset != 7) ++ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); ++ ++ break; ++ case CHIP_POLARIS10: ++ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); ++ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P4_16x16) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ ++ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | ++ NUM_BANKS(ADDR_SURF_16_BANK)); ++ ++ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | ++ NUM_BANKS(ADDR_SURF_8_BANK)); ++ ++ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | ++ NUM_BANKS(ADDR_SURF_4_BANK)); ++ ++ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | ++ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | ++ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | ++ NUM_BANKS(ADDR_SURF_4_BANK)); ++ ++ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) ++ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); ++ ++ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) ++ if (reg_offset != 7) ++ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); ++ ++ break; ++ case CHIP_STONEY: ++ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); ++ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | ++ PIPE_CONFIG(ADDR_SURF_P2)); ++ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); ++ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); ++ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); ++ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | ++ PIPE_CONFIG(ADDR_SURF_P2) | ++ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | ++ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | +@@ -2695,6 +3551,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) + gfx_v8_0_tiling_mode_table_init(adev); + + gfx_v8_0_setup_rb(adev); ++ gfx_v8_0_get_cu_info(adev); + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ +@@ -2775,17 +3632,263 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) + } + } + +-static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, +- bool enable) ++static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, ++ bool enable) ++{ ++ u32 tmp = RREG32(mmCP_INT_CNTL_RING0); ++ ++ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); ++ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); ++ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); ++ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); ++ ++ WREG32(mmCP_INT_CNTL_RING0, tmp); ++} ++ ++static void gfx_v8_0_init_csb(struct amdgpu_device *adev) ++{ ++ /* csib */ ++ WREG32(mmRLC_CSIB_ADDR_HI, ++ adev->gfx.rlc.clear_state_gpu_addr >> 32); ++ WREG32(mmRLC_CSIB_ADDR_LO, ++ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); ++ WREG32(mmRLC_CSIB_LENGTH, ++ adev->gfx.rlc.clear_state_size); ++} ++ ++static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, ++ int ind_offset, ++ int list_size, ++ int *unique_indices, ++ int *indices_count, ++ int max_indices, ++ int *ind_start_offsets, ++ int *offset_count, ++ int max_offset) ++{ ++ int indices; ++ bool new_entry = true; ++ ++ for (; ind_offset < list_size; ind_offset++) { ++ ++ if (new_entry) { ++ new_entry = false; ++ ind_start_offsets[*offset_count] = ind_offset; ++ *offset_count = *offset_count + 1; ++ BUG_ON(*offset_count >= max_offset); ++ } ++ ++ if (register_list_format[ind_offset] == 0xFFFFFFFF) { ++ new_entry = true; ++ continue; ++ } ++ ++ ind_offset += 2; ++ ++ /* look for the matching indice */ ++ for (indices = 0; ++ indices < *indices_count; ++ indices++) { ++ if (unique_indices[indices] == ++ register_list_format[ind_offset]) ++ break; ++ } ++ ++ if (indices >= *indices_count) { ++ unique_indices[*indices_count] = ++ register_list_format[ind_offset]; ++ indices = *indices_count; ++ *indices_count = *indices_count + 1; ++ BUG_ON(*indices_count >= max_indices); ++ } ++ ++ register_list_format[ind_offset] = indices; ++ } ++} ++ ++static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) ++{ ++ int i, temp, data; ++ int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; ++ int indices_count = 0; ++ int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; ++ int offset_count = 0; ++ ++ int list_size; ++ unsigned int *register_list_format = ++ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); ++ if (register_list_format == NULL) ++ return -ENOMEM; ++ memcpy(register_list_format, adev->gfx.rlc.register_list_format, ++ adev->gfx.rlc.reg_list_format_size_bytes); ++ ++ gfx_v8_0_parse_ind_reg_list(register_list_format, ++ RLC_FormatDirectRegListLength, ++ adev->gfx.rlc.reg_list_format_size_bytes >> 2, ++ unique_indices, ++ &indices_count, ++ sizeof(unique_indices) / sizeof(int), ++ indirect_start_offsets, ++ &offset_count, ++ sizeof(indirect_start_offsets)/sizeof(int)); ++ ++ /* save and restore list */ ++ temp = RREG32(mmRLC_SRM_CNTL); ++ temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; ++ WREG32(mmRLC_SRM_CNTL, temp); ++ ++ WREG32(mmRLC_SRM_ARAM_ADDR, 0); ++ for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) ++ WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); ++ ++ /* indirect list */ ++ WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); ++ for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) ++ WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); ++ ++ list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; ++ list_size = list_size >> 1; ++ WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); ++ WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); ++ ++ /* starting offsets starts */ ++ WREG32(mmRLC_GPM_SCRATCH_ADDR, ++ adev->gfx.rlc.starting_offsets_start); ++ for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) ++ WREG32(mmRLC_GPM_SCRATCH_DATA, ++ indirect_start_offsets[i]); ++ ++ /* unique indices */ ++ temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; ++ data = mmRLC_SRM_INDEX_CNTL_DATA_0; ++ for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { ++ amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); ++ amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); ++ } ++ kfree(register_list_format); ++ ++ return 0; ++} ++ ++static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) ++{ ++ uint32_t data; ++ ++ data = RREG32(mmRLC_SRM_CNTL); ++ data |= RLC_SRM_CNTL__SRM_ENABLE_MASK; ++ WREG32(mmRLC_SRM_CNTL, data); ++} ++ ++static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) ++{ ++ uint32_t data; ++ ++ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | ++ AMD_PG_SUPPORT_GFX_SMG | ++ AMD_PG_SUPPORT_GFX_DMG)) { ++ data = RREG32(mmCP_RB_WPTR_POLL_CNTL); ++ data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; ++ data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); ++ WREG32(mmCP_RB_WPTR_POLL_CNTL, data); ++ ++ data = 0; ++ data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); ++ data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); ++ data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); ++ data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); ++ WREG32(mmRLC_PG_DELAY, data); ++ ++ data = RREG32(mmRLC_PG_DELAY_2); ++ data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; ++ data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); ++ WREG32(mmRLC_PG_DELAY_2, data); ++ ++ data = RREG32(mmRLC_AUTO_PG_CTRL); ++ data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; ++ data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); ++ WREG32(mmRLC_AUTO_PG_CTRL, data); ++ } ++} ++ ++static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, ++ bool enable) + { +- u32 tmp = RREG32(mmCP_INT_CNTL_RING0); ++ u32 data, orig; + +- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); +- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); +- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); +- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); ++ orig = data = RREG32(mmRLC_PG_CNTL); + +- WREG32(mmCP_INT_CNTL_RING0, tmp); ++ if (enable) ++ data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; ++ else ++ data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; ++ ++ if (orig != data) ++ WREG32(mmRLC_PG_CNTL, data); ++} ++ ++static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, ++ bool enable) ++{ ++ u32 data, orig; ++ ++ orig = data = RREG32(mmRLC_PG_CNTL); ++ ++ if (enable) ++ data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; ++ else ++ data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; ++ ++ if (orig != data) ++ WREG32(mmRLC_PG_CNTL, data); ++} ++ ++static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) ++{ ++ u32 data, orig; ++ ++ orig = data = RREG32(mmRLC_PG_CNTL); ++ ++ if (enable) ++ data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; ++ else ++ data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK; ++ ++ if (orig != data) ++ WREG32(mmRLC_PG_CNTL, data); ++} ++ ++static void gfx_v8_0_init_pg(struct amdgpu_device *adev) ++{ ++ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | ++ AMD_PG_SUPPORT_GFX_SMG | ++ AMD_PG_SUPPORT_GFX_DMG | ++ AMD_PG_SUPPORT_CP | ++ AMD_PG_SUPPORT_GDS | ++ AMD_PG_SUPPORT_RLC_SMU_HS)) { ++ gfx_v8_0_init_csb(adev); ++ gfx_v8_0_init_save_restore_list(adev); ++ gfx_v8_0_enable_save_restore_machine(adev); ++ ++ if ((adev->asic_type == CHIP_CARRIZO) || ++ (adev->asic_type == CHIP_STONEY)) { ++ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); ++ gfx_v8_0_init_power_gating(adev); ++ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); ++ if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { ++ cz_enable_sck_slow_down_on_power_up(adev, true); ++ cz_enable_sck_slow_down_on_power_down(adev, true); ++ } else { ++ cz_enable_sck_slow_down_on_power_up(adev, false); ++ cz_enable_sck_slow_down_on_power_down(adev, false); ++ } ++ if (adev->pg_flags & AMD_PG_SUPPORT_CP) ++ cz_enable_cp_power_gating(adev, true); ++ else ++ cz_enable_cp_power_gating(adev, false); ++ } else if (adev->asic_type == CHIP_POLARIS11) { ++ gfx_v8_0_init_power_gating(adev); ++ } ++ } + } + + void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) +@@ -2858,12 +3961,17 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) + + /* disable CG */ + WREG32(mmRLC_CGCG_CGLS_CTRL, 0); ++ if (adev->asic_type == CHIP_POLARIS11 || ++ adev->asic_type == CHIP_POLARIS10) ++ WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0); + + /* disable PG */ + WREG32(mmRLC_PG_CNTL, 0); + + gfx_v8_0_rlc_reset(adev); + ++ gfx_v8_0_init_pg(adev); ++ + if (!adev->pp_enabled) { + if (!adev->firmware.smu_load) { + /* legacy rlc firmware loading */ +@@ -3035,9 +4143,14 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) + amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); + switch (adev->asic_type) { + case CHIP_TONGA: ++ case CHIP_POLARIS10: + amdgpu_ring_write(ring, 0x16000012); + amdgpu_ring_write(ring, 0x0000002A); + break; ++ case CHIP_POLARIS11: ++ amdgpu_ring_write(ring, 0x16000012); ++ amdgpu_ring_write(ring, 0x00000000); ++ break; + case CHIP_FIJI: + amdgpu_ring_write(ring, 0x3a00161a); + amdgpu_ring_write(ring, 0x0000002e); +@@ -3122,6 +4235,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, ++ DOORBELL_HIT, 0); ++ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, +@@ -3679,7 +4794,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) + if (use_doorbell) { + if ((adev->asic_type == CHIP_CARRIZO) || + (adev->asic_type == CHIP_FIJI) || +- (adev->asic_type == CHIP_STONEY)) { ++ (adev->asic_type == CHIP_STONEY) || ++ (adev->asic_type == CHIP_POLARIS11) || ++ (adev->asic_type == CHIP_POLARIS10)) { + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, + AMDGPU_DOORBELL_KIQ << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, +@@ -3713,7 +4830,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); + mqd->cp_hqd_persistent_state = tmp; +- if (adev->asic_type == CHIP_STONEY) { ++ if (adev->asic_type == CHIP_STONEY || ++ adev->asic_type == CHIP_POLARIS11 || ++ adev->asic_type == CHIP_POLARIS10) { + tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); + WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); +@@ -3845,6 +4964,9 @@ static int gfx_v8_0_hw_fini(void *handle) + gfx_v8_0_rlc_stop(adev); + gfx_v8_0_cp_compute_fini(adev); + ++ amdgpu_set_powergating_state(adev, ++ AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); ++ + return 0; + } + +@@ -3889,185 +5011,6 @@ static int gfx_v8_0_wait_for_idle(void *handle) + return -ETIMEDOUT; + } + +-static void gfx_v8_0_print_status(void *handle) +-{ +- int i; +- struct amdgpu_device *adev = (struct amdgpu_device *)handle; +- +- dev_info(adev->dev, "GFX 8.x registers\n"); +- dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", +- RREG32(mmGRBM_STATUS)); +- dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", +- RREG32(mmGRBM_STATUS2)); +- dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", +- RREG32(mmGRBM_STATUS_SE0)); +- dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", +- RREG32(mmGRBM_STATUS_SE1)); +- dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", +- RREG32(mmGRBM_STATUS_SE2)); +- dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", +- RREG32(mmGRBM_STATUS_SE3)); +- dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); +- dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", +- RREG32(mmCP_STALLED_STAT1)); +- dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", +- RREG32(mmCP_STALLED_STAT2)); +- dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", +- RREG32(mmCP_STALLED_STAT3)); +- dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", +- RREG32(mmCP_CPF_BUSY_STAT)); +- dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", +- RREG32(mmCP_CPF_STALLED_STAT1)); +- dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); +- dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); +- dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", +- RREG32(mmCP_CPC_STALLED_STAT1)); +- dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); +- +- for (i = 0; i < 32; i++) { +- dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n", +- i, RREG32(mmGB_TILE_MODE0 + (i * 4))); +- } +- for (i = 0; i < 16; i++) { +- dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n", +- i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4))); +- } +- for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { +- dev_info(adev->dev, " se: %d\n", i); +- gfx_v8_0_select_se_sh(adev, i, 0xffffffff); +- dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n", +- RREG32(mmPA_SC_RASTER_CONFIG)); +- dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n", +- RREG32(mmPA_SC_RASTER_CONFIG_1)); +- } +- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); +- +- dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n", +- RREG32(mmGB_ADDR_CONFIG)); +- dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n", +- RREG32(mmHDP_ADDR_CONFIG)); +- dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", +- RREG32(mmDMIF_ADDR_CALC)); +- +- dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", +- RREG32(mmCP_MEQ_THRESHOLDS)); +- dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n", +- RREG32(mmSX_DEBUG_1)); +- dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n", +- RREG32(mmTA_CNTL_AUX)); +- dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n", +- RREG32(mmSPI_CONFIG_CNTL)); +- dev_info(adev->dev, " SQ_CONFIG=0x%08X\n", +- RREG32(mmSQ_CONFIG)); +- dev_info(adev->dev, " DB_DEBUG=0x%08X\n", +- RREG32(mmDB_DEBUG)); +- dev_info(adev->dev, " DB_DEBUG2=0x%08X\n", +- RREG32(mmDB_DEBUG2)); +- dev_info(adev->dev, " DB_DEBUG3=0x%08X\n", +- RREG32(mmDB_DEBUG3)); +- dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n", +- RREG32(mmCB_HW_CONTROL)); +- dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n", +- RREG32(mmSPI_CONFIG_CNTL_1)); +- dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n", +- RREG32(mmPA_SC_FIFO_SIZE)); +- dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n", +- RREG32(mmVGT_NUM_INSTANCES)); +- dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n", +- RREG32(mmCP_PERFMON_CNTL)); +- dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n", +- RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS)); +- dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n", +- RREG32(mmVGT_CACHE_INVALIDATION)); +- dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n", +- RREG32(mmVGT_GS_VERTEX_REUSE)); +- dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n", +- RREG32(mmPA_SC_LINE_STIPPLE_STATE)); +- dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n", +- RREG32(mmPA_CL_ENHANCE)); +- dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n", +- RREG32(mmPA_SC_ENHANCE)); +- +- dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n", +- RREG32(mmCP_ME_CNTL)); +- dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n", +- RREG32(mmCP_MAX_CONTEXT)); +- dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n", +- RREG32(mmCP_ENDIAN_SWAP)); +- dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n", +- RREG32(mmCP_DEVICE_ID)); +- +- dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n", +- RREG32(mmCP_SEM_WAIT_TIMER)); +- +- dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n", +- RREG32(mmCP_RB_WPTR_DELAY)); +- dev_info(adev->dev, " CP_RB_VMID=0x%08X\n", +- RREG32(mmCP_RB_VMID)); +- dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", +- RREG32(mmCP_RB0_CNTL)); +- dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n", +- RREG32(mmCP_RB0_WPTR)); +- dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n", +- RREG32(mmCP_RB0_RPTR_ADDR)); +- dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n", +- RREG32(mmCP_RB0_RPTR_ADDR_HI)); +- dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", +- RREG32(mmCP_RB0_CNTL)); +- dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n", +- RREG32(mmCP_RB0_BASE)); +- dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n", +- RREG32(mmCP_RB0_BASE_HI)); +- dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n", +- RREG32(mmCP_MEC_CNTL)); +- dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n", +- RREG32(mmCP_CPF_DEBUG)); +- +- dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n", +- RREG32(mmSCRATCH_ADDR)); +- dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n", +- RREG32(mmSCRATCH_UMSK)); +- +- dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n", +- RREG32(mmCP_INT_CNTL_RING0)); +- dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", +- RREG32(mmRLC_LB_CNTL)); +- dev_info(adev->dev, " RLC_CNTL=0x%08X\n", +- RREG32(mmRLC_CNTL)); +- dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n", +- RREG32(mmRLC_CGCG_CGLS_CTRL)); +- dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n", +- RREG32(mmRLC_LB_CNTR_INIT)); +- dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n", +- RREG32(mmRLC_LB_CNTR_MAX)); +- dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n", +- RREG32(mmRLC_LB_INIT_CU_MASK)); +- dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n", +- RREG32(mmRLC_LB_PARAMS)); +- dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", +- RREG32(mmRLC_LB_CNTL)); +- dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n", +- RREG32(mmRLC_MC_CNTL)); +- dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n", +- RREG32(mmRLC_UCODE_CNTL)); +- +- mutex_lock(&adev->srbm_mutex); +- for (i = 0; i < 16; i++) { +- vi_srbm_select(adev, 0, 0, 0, i); +- dev_info(adev->dev, " VM %d:\n", i); +- dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n", +- RREG32(mmSH_MEM_CONFIG)); +- dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n", +- RREG32(mmSH_MEM_APE1_BASE)); +- dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n", +- RREG32(mmSH_MEM_APE1_LIMIT)); +- dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n", +- RREG32(mmSH_MEM_BASES)); +- } +- vi_srbm_select(adev, 0, 0, 0, 0); +- mutex_unlock(&adev->srbm_mutex); +-} +- + static int gfx_v8_0_soft_reset(void *handle) + { + u32 grbm_soft_reset = 0, srbm_soft_reset = 0; +@@ -4108,7 +5051,6 @@ static int gfx_v8_0_soft_reset(void *handle) + SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); + + if (grbm_soft_reset || srbm_soft_reset) { +- gfx_v8_0_print_status((void *)adev); + /* stop the rlc */ + gfx_v8_0_rlc_stop(adev); + +@@ -4168,7 +5110,6 @@ static int gfx_v8_0_soft_reset(void *handle) + + /* Wait a little for things to settle down */ + udelay(50); +- gfx_v8_0_print_status((void *)adev); + } + return 0; + } +@@ -4250,6 +5191,7 @@ static int gfx_v8_0_early_init(void *handle) + gfx_v8_0_set_ring_funcs(adev); + gfx_v8_0_set_irq_funcs(adev); + gfx_v8_0_set_gds_init(adev); ++ gfx_v8_0_set_rlc_funcs(adev); + + return 0; + } +@@ -4272,17 +5214,181 @@ static int gfx_v8_0_late_init(void *handle) + if (r) + return r; + ++ amdgpu_set_powergating_state(adev, ++ AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); ++ + return 0; + } + ++static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, ++ bool enable) ++{ ++ uint32_t data, temp; ++ ++ if (adev->asic_type == CHIP_POLARIS11) ++ /* Send msg to SMU via Powerplay */ ++ amdgpu_set_powergating_state(adev, ++ AMD_IP_BLOCK_TYPE_SMC, ++ enable ? ++ AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); ++ ++ if (enable) { ++ /* Enable static MGPG */ ++ temp = data = RREG32(mmRLC_PG_CNTL); ++ data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; ++ ++ if (temp != data) ++ WREG32(mmRLC_PG_CNTL, data); ++ } else { ++ temp = data = RREG32(mmRLC_PG_CNTL); ++ data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; ++ ++ if (temp != data) ++ WREG32(mmRLC_PG_CNTL, data); ++ } ++} ++ ++static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, ++ bool enable) ++{ ++ uint32_t data, temp; ++ ++ if (enable) { ++ /* Enable dynamic MGPG */ ++ temp = data = RREG32(mmRLC_PG_CNTL); ++ data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; ++ ++ if (temp != data) ++ WREG32(mmRLC_PG_CNTL, data); ++ } else { ++ temp = data = RREG32(mmRLC_PG_CNTL); ++ data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; ++ ++ if (temp != data) ++ WREG32(mmRLC_PG_CNTL, data); ++ } ++} ++ ++static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, ++ bool enable) ++{ ++ uint32_t data, temp; ++ ++ if (enable) { ++ /* Enable quick PG */ ++ temp = data = RREG32(mmRLC_PG_CNTL); ++ data |= 0x100000; ++ ++ if (temp != data) ++ WREG32(mmRLC_PG_CNTL, data); ++ } else { ++ temp = data = RREG32(mmRLC_PG_CNTL); ++ data &= ~0x100000; ++ ++ if (temp != data) ++ WREG32(mmRLC_PG_CNTL, data); ++ } ++} ++ ++static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, ++ bool enable) ++{ ++ u32 data, orig; ++ ++ orig = data = RREG32(mmRLC_PG_CNTL); ++ ++ if (enable) ++ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; ++ else ++ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; ++ ++ if (orig != data) ++ WREG32(mmRLC_PG_CNTL, data); ++} ++ ++static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, ++ bool enable) ++{ ++ u32 data, orig; ++ ++ orig = data = RREG32(mmRLC_PG_CNTL); ++ ++ if (enable) ++ data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; ++ else ++ data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; ++ ++ if (orig != data) ++ WREG32(mmRLC_PG_CNTL, data); ++ ++ /* Read any GFX register to wake up GFX. */ ++ if (!enable) ++ data = RREG32(mmDB_RENDER_CONTROL); ++} ++ ++static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, ++ bool enable) ++{ ++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { ++ cz_enable_gfx_cg_power_gating(adev, true); ++ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) ++ cz_enable_gfx_pipeline_power_gating(adev, true); ++ } else { ++ cz_enable_gfx_cg_power_gating(adev, false); ++ cz_enable_gfx_pipeline_power_gating(adev, false); ++ } ++} ++ + static int gfx_v8_0_set_powergating_state(void *handle, + enum amd_powergating_state state) + { ++ struct amdgpu_device *adev = (struct amdgpu_device *)handle; ++ bool enable = (state == AMD_PG_STATE_GATE) ? true : false; ++ ++ if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) ++ return 0; ++ ++ switch (adev->asic_type) { ++ case CHIP_CARRIZO: ++ case CHIP_STONEY: ++ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) ++ cz_update_gfx_cg_power_gating(adev, enable); ++ ++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) ++ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); ++ else ++ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); ++ ++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) ++ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); ++ else ++ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); ++ break; ++ case CHIP_POLARIS11: ++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) ++ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); ++ else ++ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); ++ ++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) ++ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); ++ else ++ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); ++ ++ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) ++ polaris11_enable_gfx_quick_mg_power_gating(adev, true); ++ else ++ polaris11_enable_gfx_quick_mg_power_gating(adev, false); ++ break; ++ default: ++ break; ++ } ++ + return 0; + } + +-static void fiji_send_serdes_cmd(struct amdgpu_device *adev, +- uint32_t reg_addr, uint32_t cmd) ++static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, ++ uint32_t reg_addr, uint32_t cmd) + { + uint32_t data; + +@@ -4292,7 +5398,8 @@ static void fiji_send_serdes_cmd(struct amdgpu_device *adev, + WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); + + data = RREG32(mmRLC_SERDES_WR_CTRL); +- data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | ++ if (adev->asic_type == CHIP_STONEY) ++ data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | + RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | + RLC_SERDES_WR_CTRL__P1_SELECT_MASK | + RLC_SERDES_WR_CTRL__P2_SELECT_MASK | +@@ -4300,42 +5407,218 @@ static void fiji_send_serdes_cmd(struct amdgpu_device *adev, + RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | + RLC_SERDES_WR_CTRL__POWER_UP_MASK | + RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | +- RLC_SERDES_WR_CTRL__BPM_DATA_MASK | +- RLC_SERDES_WR_CTRL__REG_ADDR_MASK | + RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); ++ else ++ data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | ++ RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | ++ RLC_SERDES_WR_CTRL__P1_SELECT_MASK | ++ RLC_SERDES_WR_CTRL__P2_SELECT_MASK | ++ RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | ++ RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | ++ RLC_SERDES_WR_CTRL__POWER_UP_MASK | ++ RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | ++ RLC_SERDES_WR_CTRL__BPM_DATA_MASK | ++ RLC_SERDES_WR_CTRL__REG_ADDR_MASK | ++ RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); + data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | +- (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | +- (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | +- (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); ++ (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | ++ (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | ++ (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); + + WREG32(mmRLC_SERDES_WR_CTRL, data); + } + +-static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, +- bool enable) ++#define MSG_ENTER_RLC_SAFE_MODE 1 ++#define MSG_EXIT_RLC_SAFE_MODE 0 ++ ++#define RLC_GPR_REG2__REQ_MASK 0x00000001 ++#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 ++#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e ++ ++static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) ++{ ++ u32 data = 0; ++ unsigned i; ++ ++ data = RREG32(mmRLC_CNTL); ++ if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) ++ return; ++ ++ if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || ++ (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | ++ AMD_PG_SUPPORT_GFX_DMG))) { ++ data |= RLC_GPR_REG2__REQ_MASK; ++ data &= ~RLC_GPR_REG2__MESSAGE_MASK; ++ data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); ++ WREG32(mmRLC_GPR_REG2, data); ++ ++ for (i = 0; i < adev->usec_timeout; i++) { ++ if ((RREG32(mmRLC_GPM_STAT) & ++ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | ++ RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == ++ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | ++ RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ++ break; ++ udelay(1); ++ } ++ ++ for (i = 0; i < adev->usec_timeout; i++) { ++ if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) ++ break; ++ udelay(1); ++ } ++ adev->gfx.rlc.in_safe_mode = true; ++ } ++} ++ ++static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) ++{ ++ u32 data; ++ unsigned i; ++ ++ data = RREG32(mmRLC_CNTL); ++ if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) ++ return; ++ ++ if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || ++ (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | ++ AMD_PG_SUPPORT_GFX_DMG))) { ++ data |= RLC_GPR_REG2__REQ_MASK; ++ data &= ~RLC_GPR_REG2__MESSAGE_MASK; ++ data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); ++ WREG32(mmRLC_GPR_REG2, data); ++ adev->gfx.rlc.in_safe_mode = false; ++ } ++ ++ for (i = 0; i < adev->usec_timeout; i++) { ++ if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) ++ break; ++ udelay(1); ++ } ++} ++ ++static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) ++{ ++ u32 data; ++ unsigned i; ++ ++ data = RREG32(mmRLC_CNTL); ++ if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) ++ return; ++ ++ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { ++ data |= RLC_SAFE_MODE__CMD_MASK; ++ data &= ~RLC_SAFE_MODE__MESSAGE_MASK; ++ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); ++ WREG32(mmRLC_SAFE_MODE, data); ++ ++ for (i = 0; i < adev->usec_timeout; i++) { ++ if ((RREG32(mmRLC_GPM_STAT) & ++ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | ++ RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == ++ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | ++ RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ++ break; ++ udelay(1); ++ } ++ ++ for (i = 0; i < adev->usec_timeout; i++) { ++ if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) ++ break; ++ udelay(1); ++ } ++ adev->gfx.rlc.in_safe_mode = true; ++ } ++} ++ ++static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) ++{ ++ u32 data = 0; ++ unsigned i; ++ ++ data = RREG32(mmRLC_CNTL); ++ if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) ++ return; ++ ++ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { ++ if (adev->gfx.rlc.in_safe_mode) { ++ data |= RLC_SAFE_MODE__CMD_MASK; ++ data &= ~RLC_SAFE_MODE__MESSAGE_MASK; ++ WREG32(mmRLC_SAFE_MODE, data); ++ adev->gfx.rlc.in_safe_mode = false; ++ } ++ } ++ ++ for (i = 0; i < adev->usec_timeout; i++) { ++ if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) ++ break; ++ udelay(1); ++ } ++} ++ ++static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) ++{ ++ adev->gfx.rlc.in_safe_mode = true; ++} ++ ++static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) ++{ ++ adev->gfx.rlc.in_safe_mode = false; ++} ++ ++static const struct amdgpu_rlc_funcs cz_rlc_funcs = { ++ .enter_safe_mode = cz_enter_rlc_safe_mode, ++ .exit_safe_mode = cz_exit_rlc_safe_mode ++}; ++ ++static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { ++ .enter_safe_mode = iceland_enter_rlc_safe_mode, ++ .exit_safe_mode = iceland_exit_rlc_safe_mode ++}; ++ ++static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { ++ .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, ++ .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode ++}; ++ ++static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, ++ bool enable) + { + uint32_t temp, data; + ++ adev->gfx.rlc.funcs->enter_safe_mode(adev); ++ + /* It is disabled by HW by default */ +- if (enable) { +- /* 1 - RLC memory Light sleep */ +- temp = data = RREG32(mmRLC_MEM_SLP_CNTL); +- data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; +- if (temp != data) +- WREG32(mmRLC_MEM_SLP_CNTL, data); ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { ++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { ++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { ++ /* 1 - RLC memory Light sleep */ ++ temp = data = RREG32(mmRLC_MEM_SLP_CNTL); ++ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; ++ if (temp != data) ++ WREG32(mmRLC_MEM_SLP_CNTL, data); ++ } + +- /* 2 - CP memory Light sleep */ +- temp = data = RREG32(mmCP_MEM_SLP_CNTL); +- data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; +- if (temp != data) +- WREG32(mmCP_MEM_SLP_CNTL, data); ++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { ++ /* 2 - CP memory Light sleep */ ++ temp = data = RREG32(mmCP_MEM_SLP_CNTL); ++ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; ++ if (temp != data) ++ WREG32(mmCP_MEM_SLP_CNTL, data); ++ } ++ } + + /* 3 - RLC_CGTT_MGCG_OVERRIDE */ + temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); +- data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | +- RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | +- RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | +- RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); ++ if (adev->flags & AMD_IS_APU) ++ data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | ++ RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | ++ RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); ++ else ++ data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | ++ RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | ++ RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | ++ RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); + + if (temp != data) + WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); +@@ -4344,19 +5627,23 @@ static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, + gfx_v8_0_wait_for_rlc_serdes(adev); + + /* 5 - clear mgcg override */ +- fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); +- +- /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ +- temp = data = RREG32(mmCGTS_SM_CTRL_REG); +- data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); +- data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); +- data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; +- data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; +- data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; +- data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; +- data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); +- if (temp != data) +- WREG32(mmCGTS_SM_CTRL_REG, data); ++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); ++ ++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { ++ /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ ++ temp = data = RREG32(mmCGTS_SM_CTRL_REG); ++ data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); ++ data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); ++ data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; ++ data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; ++ if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && ++ (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) ++ data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; ++ data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; ++ data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); ++ if (temp != data) ++ WREG32(mmCGTS_SM_CTRL_REG, data); ++ } + udelay(50); + + /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ +@@ -4396,23 +5683,27 @@ static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, + gfx_v8_0_wait_for_rlc_serdes(adev); + + /* 6 - set mgcg override */ +- fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); ++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); + + udelay(50); + + /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ + gfx_v8_0_wait_for_rlc_serdes(adev); + } ++ ++ adev->gfx.rlc.funcs->exit_safe_mode(adev); + } + +-static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, +- bool enable) ++static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, ++ bool enable) + { + uint32_t temp, temp1, data, data1; + + temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); + +- if (enable) { ++ adev->gfx.rlc.funcs->enter_safe_mode(adev); ++ ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ + * Cmp_busy/GFX_Idle interrupts + */ +@@ -4427,25 +5718,29 @@ static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + gfx_v8_0_wait_for_rlc_serdes(adev); + + /* 3 - clear cgcg override */ +- fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); ++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); + + /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ + gfx_v8_0_wait_for_rlc_serdes(adev); + + /* 4 - write cmd to set CGLS */ +- fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); ++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); + + /* 5 - enable cgcg */ + data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + +- /* enable cgls*/ +- data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; ++ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { ++ /* enable cgls*/ ++ data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + +- temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); +- data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; ++ temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); ++ data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; + +- if (temp1 != data1) +- WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); ++ if (temp1 != data1) ++ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); ++ } else { ++ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; ++ } + + if (temp != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); +@@ -4470,36 +5765,38 @@ static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + gfx_v8_0_wait_for_rlc_serdes(adev); + + /* write cmd to Set CGCG Overrride */ +- fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); ++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); + + /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ + gfx_v8_0_wait_for_rlc_serdes(adev); + + /* write cmd to Clear CGLS */ +- fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); ++ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); + + /* disable cgcg, cgls should be disabled too. */ + data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | +- RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); ++ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + if (temp != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + } ++ ++ adev->gfx.rlc.funcs->exit_safe_mode(adev); + } +-static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev, +- bool enable) ++static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, ++ bool enable) + { + if (enable) { + /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) + * === MGCG + MGLS + TS(CG/LS) === + */ +- fiji_update_medium_grain_clock_gating(adev, enable); +- fiji_update_coarse_grain_clock_gating(adev, enable); ++ gfx_v8_0_update_medium_grain_clock_gating(adev, enable); ++ gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); + } else { + /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) + * === CGCG + CGLS === + */ +- fiji_update_coarse_grain_clock_gating(adev, enable); +- fiji_update_medium_grain_clock_gating(adev, enable); ++ gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); ++ gfx_v8_0_update_medium_grain_clock_gating(adev, enable); + } + return 0; + } +@@ -4511,8 +5808,10 @@ static int gfx_v8_0_set_clockgating_state(void *handle, + + switch (adev->asic_type) { + case CHIP_FIJI: +- fiji_update_gfx_clock_gating(adev, +- state == AMD_CG_STATE_GATE ? true : false); ++ case CHIP_CARRIZO: ++ case CHIP_STONEY: ++ gfx_v8_0_update_gfx_clock_gating(adev, ++ state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; +@@ -4602,17 +5901,13 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) + } + + static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, +- struct amdgpu_ib *ib) ++ struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch) + { +- bool need_ctx_switch = ring->current_ctx != ib->ctx; + u32 header, control = 0; + u32 next_rptr = ring->wptr + 5; + +- /* drop the CE preamble IB for the same context */ +- if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch) +- return; +- +- if (need_ctx_switch) ++ if (ctx_switch) + next_rptr += 2; + + next_rptr += 4; +@@ -4623,7 +5918,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + amdgpu_ring_write(ring, next_rptr); + + /* insert SWITCH_BUFFER packet before first IB in the ring frame */ +- if (need_ctx_switch) { ++ if (ctx_switch) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + } +@@ -4633,7 +5928,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + +- control |= ib->length_dw | (ib->vm_id << 24); ++ control |= ib->length_dw | (vm_id << 24); + + amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, +@@ -4646,7 +5941,8 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + } + + static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, +- struct amdgpu_ib *ib) ++ struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch) + { + u32 header, control = 0; + u32 next_rptr = ring->wptr + 5; +@@ -4662,7 +5958,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + +- control |= ib->length_dw | (ib->vm_id << 24); ++ control |= ib->length_dw | (vm_id << 24); + + amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, +@@ -4684,6 +5980,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | ++ EOP_TC_WB_ACTION_EN | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, addr & 0xfffffffc); +@@ -5022,6 +6319,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, + } + + const struct amd_ip_funcs gfx_v8_0_ip_funcs = { ++ .name = "gfx_v8_0", + .early_init = gfx_v8_0_early_init, + .late_init = gfx_v8_0_late_init, + .sw_init = gfx_v8_0_sw_init, +@@ -5033,7 +6331,6 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = { + .is_idle = gfx_v8_0_is_idle, + .wait_for_idle = gfx_v8_0_wait_for_idle, + .soft_reset = gfx_v8_0_soft_reset, +- .print_status = gfx_v8_0_print_status, + .set_clockgating_state = gfx_v8_0_set_clockgating_state, + .set_powergating_state = gfx_v8_0_set_powergating_state, + }; +@@ -5112,6 +6409,22 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) + adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; + } + ++static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) ++{ ++ switch (adev->asic_type) { ++ case CHIP_TOPAZ: ++ case CHIP_STONEY: ++ adev->gfx.rlc.funcs = &iceland_rlc_funcs; ++ break; ++ case CHIP_CARRIZO: ++ adev->gfx.rlc.funcs = &cz_rlc_funcs; ++ break; ++ default: ++ adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; ++ break; ++ } ++} ++ + static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) + { + /* init asci gds info */ +@@ -5155,14 +6468,11 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) + return (~data) & mask; + } + +-int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, +- struct amdgpu_cu_info *cu_info) ++static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) + { + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; +- +- if (!adev || !cu_info) +- return -EINVAL; ++ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; + + memset(cu_info, 0, sizeof(*cu_info)); + +@@ -5193,6 +6503,4 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; +- +- return 0; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h +index 021e051..16a49f5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h +@@ -28,6 +28,5 @@ extern const struct amd_ip_funcs gfx_v8_0_ip_funcs; + + uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev); + void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); +-int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); + + #endif +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +index 29bd7b5..9945d5b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +@@ -43,6 +43,8 @@ static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev); + static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); + + MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_mc.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_mc.bin"); + + static const u32 golden_settings_tonga_a11[] = + { +@@ -73,6 +75,23 @@ static const u32 fiji_mgcg_cgcg_init[] = + mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 + }; + ++static const u32 golden_settings_polaris11_a11[] = ++{ ++ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, ++ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, ++ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, ++ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff ++}; ++ ++static const u32 golden_settings_polaris10_a11[] = ++{ ++ mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000, ++ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, ++ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, ++ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, ++ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff ++}; ++ + static const u32 cz_mgcg_cgcg_init[] = + { + mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 +@@ -103,6 +122,16 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) + golden_settings_tonga_a11, + (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); + break; ++ case CHIP_POLARIS11: ++ amdgpu_program_register_sequence(adev, ++ golden_settings_polaris11_a11, ++ (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); ++ break; ++ case CHIP_POLARIS10: ++ amdgpu_program_register_sequence(adev, ++ golden_settings_polaris10_a11, ++ (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); ++ break; + case CHIP_CARRIZO: + amdgpu_program_register_sequence(adev, + cz_mgcg_cgcg_init, +@@ -209,6 +238,12 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) + case CHIP_TONGA: + chip_name = "tonga"; + break; ++ case CHIP_POLARIS11: ++ chip_name = "polaris11"; ++ break; ++ case CHIP_POLARIS10: ++ chip_name = "polaris10"; ++ break; + case CHIP_FIJI: + case CHIP_CARRIZO: + case CHIP_STONEY: +@@ -863,14 +898,6 @@ static int gmc_v8_0_early_init(void *handle) + gmc_v8_0_set_gart_funcs(adev); + gmc_v8_0_set_irq_funcs(adev); + +- if (adev->flags & AMD_IS_APU) { +- adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; +- } else { +- u32 tmp = RREG32(mmMC_SEQ_MISC0); +- tmp &= MC_SEQ_MISC0__MT__MASK; +- adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp); +- } +- + return 0; + } + +@@ -878,15 +905,33 @@ static int gmc_v8_0_late_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +- return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); ++ if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) ++ return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); ++ else ++ return 0; + } + ++#define mmMC_SEQ_MISC0_FIJI 0xA71 ++ + static int gmc_v8_0_sw_init(void *handle) + { + int r; + int dma_bits; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + ++ if (adev->flags & AMD_IS_APU) { ++ adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; ++ } else { ++ u32 tmp; ++ ++ if (adev->asic_type == CHIP_FIJI) ++ tmp = RREG32(mmMC_SEQ_MISC0_FIJI); ++ else ++ tmp = RREG32(mmMC_SEQ_MISC0); ++ tmp &= MC_SEQ_MISC0__MT__MASK; ++ adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp); ++ } ++ + r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); + if (r) + return r; +@@ -1075,111 +1120,6 @@ static int gmc_v8_0_wait_for_idle(void *handle) + + } + +-static void gmc_v8_0_print_status(void *handle) +-{ +- int i, j; +- struct amdgpu_device *adev = (struct amdgpu_device *)handle; +- +- dev_info(adev->dev, "GMC 8.x registers\n"); +- dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", +- RREG32(mmSRBM_STATUS)); +- dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", +- RREG32(mmSRBM_STATUS2)); +- +- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", +- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); +- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", +- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); +- dev_info(adev->dev, " MC_VM_MX_L1_TLB_CNTL=0x%08X\n", +- RREG32(mmMC_VM_MX_L1_TLB_CNTL)); +- dev_info(adev->dev, " VM_L2_CNTL=0x%08X\n", +- RREG32(mmVM_L2_CNTL)); +- dev_info(adev->dev, " VM_L2_CNTL2=0x%08X\n", +- RREG32(mmVM_L2_CNTL2)); +- dev_info(adev->dev, " VM_L2_CNTL3=0x%08X\n", +- RREG32(mmVM_L2_CNTL3)); +- dev_info(adev->dev, " VM_L2_CNTL4=0x%08X\n", +- RREG32(mmVM_L2_CNTL4)); +- dev_info(adev->dev, " VM_CONTEXT0_PAGE_TABLE_START_ADDR=0x%08X\n", +- RREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR)); +- dev_info(adev->dev, " VM_CONTEXT0_PAGE_TABLE_END_ADDR=0x%08X\n", +- RREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR)); +- dev_info(adev->dev, " VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR=0x%08X\n", +- RREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR)); +- dev_info(adev->dev, " VM_CONTEXT0_CNTL2=0x%08X\n", +- RREG32(mmVM_CONTEXT0_CNTL2)); +- dev_info(adev->dev, " VM_CONTEXT0_CNTL=0x%08X\n", +- RREG32(mmVM_CONTEXT0_CNTL)); +- dev_info(adev->dev, " VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR=0x%08X\n", +- RREG32(mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR)); +- dev_info(adev->dev, " VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR=0x%08X\n", +- RREG32(mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR)); +- dev_info(adev->dev, " mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET=0x%08X\n", +- RREG32(mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET)); +- dev_info(adev->dev, " VM_CONTEXT1_PAGE_TABLE_START_ADDR=0x%08X\n", +- RREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR)); +- dev_info(adev->dev, " VM_CONTEXT1_PAGE_TABLE_END_ADDR=0x%08X\n", +- RREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR)); +- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR=0x%08X\n", +- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR)); +- dev_info(adev->dev, " VM_CONTEXT1_CNTL2=0x%08X\n", +- RREG32(mmVM_CONTEXT1_CNTL2)); +- dev_info(adev->dev, " VM_CONTEXT1_CNTL=0x%08X\n", +- RREG32(mmVM_CONTEXT1_CNTL)); +- for (i = 0; i < 16; i++) { +- if (i < 8) +- dev_info(adev->dev, " VM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%08X\n", +- i, RREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i)); +- else +- dev_info(adev->dev, " VM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%08X\n", +- i, RREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8)); +- } +- dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_LOW_ADDR=0x%08X\n", +- RREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR)); +- dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_HIGH_ADDR=0x%08X\n", +- RREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR)); +- dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR=0x%08X\n", +- RREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR)); +- dev_info(adev->dev, " MC_VM_FB_LOCATION=0x%08X\n", +- RREG32(mmMC_VM_FB_LOCATION)); +- dev_info(adev->dev, " MC_VM_AGP_BASE=0x%08X\n", +- RREG32(mmMC_VM_AGP_BASE)); +- dev_info(adev->dev, " MC_VM_AGP_TOP=0x%08X\n", +- RREG32(mmMC_VM_AGP_TOP)); +- dev_info(adev->dev, " MC_VM_AGP_BOT=0x%08X\n", +- RREG32(mmMC_VM_AGP_BOT)); +- +- dev_info(adev->dev, " HDP_REG_COHERENCY_FLUSH_CNTL=0x%08X\n", +- RREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL)); +- dev_info(adev->dev, " HDP_NONSURFACE_BASE=0x%08X\n", +- RREG32(mmHDP_NONSURFACE_BASE)); +- dev_info(adev->dev, " HDP_NONSURFACE_INFO=0x%08X\n", +- RREG32(mmHDP_NONSURFACE_INFO)); +- dev_info(adev->dev, " HDP_NONSURFACE_SIZE=0x%08X\n", +- RREG32(mmHDP_NONSURFACE_SIZE)); +- dev_info(adev->dev, " HDP_MISC_CNTL=0x%08X\n", +- RREG32(mmHDP_MISC_CNTL)); +- dev_info(adev->dev, " HDP_HOST_PATH_CNTL=0x%08X\n", +- RREG32(mmHDP_HOST_PATH_CNTL)); +- +- for (i = 0, j = 0; i < 32; i++, j += 0x6) { +- dev_info(adev->dev, " %d:\n", i); +- dev_info(adev->dev, " 0x%04X=0x%08X\n", +- 0xb05 + j, RREG32(0xb05 + j)); +- dev_info(adev->dev, " 0x%04X=0x%08X\n", +- 0xb06 + j, RREG32(0xb06 + j)); +- dev_info(adev->dev, " 0x%04X=0x%08X\n", +- 0xb07 + j, RREG32(0xb07 + j)); +- dev_info(adev->dev, " 0x%04X=0x%08X\n", +- 0xb08 + j, RREG32(0xb08 + j)); +- dev_info(adev->dev, " 0x%04X=0x%08X\n", +- 0xb09 + j, RREG32(0xb09 + j)); +- } +- +- dev_info(adev->dev, " BIF_FB_EN=0x%08X\n", +- RREG32(mmBIF_FB_EN)); +-} +- + static int gmc_v8_0_soft_reset(void *handle) + { + struct amdgpu_mode_mc_save save; +@@ -1199,8 +1139,6 @@ static int gmc_v8_0_soft_reset(void *handle) + } + + if (srbm_soft_reset) { +- gmc_v8_0_print_status((void *)adev); +- + gmc_v8_0_mc_stop(adev, &save); + if (gmc_v8_0_wait_for_idle(adev)) { + dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); +@@ -1224,8 +1162,6 @@ static int gmc_v8_0_soft_reset(void *handle) + + gmc_v8_0_mc_resume(adev, &save); + udelay(50); +- +- gmc_v8_0_print_status((void *)adev); + } + + return 0; +@@ -1303,11 +1239,11 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, + } + + static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev, +- bool enable) ++ bool enable) + { + uint32_t data; + +- if (enable) { ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data = RREG32(mmMC_HUB_MISC_HUB_CG); + data |= MC_HUB_MISC_HUB_CG__ENABLE_MASK; + WREG32(mmMC_HUB_MISC_HUB_CG, data); +@@ -1383,11 +1319,11 @@ static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev, + } + + static void fiji_update_mc_light_sleep(struct amdgpu_device *adev, +- bool enable) ++ bool enable) + { + uint32_t data; + +- if (enable) { ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) { + data = RREG32(mmMC_HUB_MISC_HUB_CG); + data |= MC_HUB_MISC_HUB_CG__MEM_LS_ENABLE_MASK; + WREG32(mmMC_HUB_MISC_HUB_CG, data); +@@ -1487,6 +1423,7 @@ static int gmc_v8_0_set_powergating_state(void *handle, + } + + const struct amd_ip_funcs gmc_v8_0_ip_funcs = { ++ .name = "gmc_v8_0", + .early_init = gmc_v8_0_early_init, + .late_init = gmc_v8_0_late_init, + .sw_init = gmc_v8_0_sw_init, +@@ -1498,7 +1435,6 @@ const struct amd_ip_funcs gmc_v8_0_ip_funcs = { + .is_idle = gmc_v8_0_is_idle, + .wait_for_idle = gmc_v8_0_wait_for_idle, + .soft_reset = gmc_v8_0_soft_reset, +- .print_status = gmc_v8_0_print_status, + .set_clockgating_state = gmc_v8_0_set_clockgating_state, + .set_powergating_state = gmc_v8_0_set_powergating_state, + }; +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +index 6e0a86a..3c09164 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +@@ -242,9 +242,10 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) + * Schedule an IB in the DMA ring (VI). + */ + static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, +- struct amdgpu_ib *ib) ++ struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch) + { +- u32 vmid = ib->vm_id & 0xf; ++ u32 vmid = vm_id & 0xf; + u32 next_rptr = ring->wptr + 5; + + while ((next_rptr & 7) != 2) +@@ -701,7 +702,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) + ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); + ib.length_dw = 8; + +- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); ++ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); + if (r) + goto err1; + +@@ -990,7 +991,7 @@ static int sdma_v2_4_sw_init(void *handle) + ring->ring_obj = NULL; + ring->use_doorbell = false; + sprintf(ring->name, "sdma%d", i); +- r = amdgpu_ring_init(adev, ring, 256 * 1024, ++ r = amdgpu_ring_init(adev, ring, 1024, + SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, + &adev->sdma.trap_irq, + (i == 0) ? +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +index 833d265..31d99b00 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +@@ -56,6 +56,11 @@ MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin"); + MODULE_FIRMWARE("amdgpu/fiji_sdma.bin"); + MODULE_FIRMWARE("amdgpu/fiji_sdma1.bin"); + MODULE_FIRMWARE("amdgpu/stoney_sdma.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_sdma.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_sdma1.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin"); ++ + + static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = + { +@@ -101,6 +106,34 @@ static const u32 fiji_mgcg_cgcg_init[] = + mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 + }; + ++static const u32 golden_settings_polaris11_a11[] = ++{ ++ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, ++ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, ++ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, ++ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, ++ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, ++}; ++ ++static const u32 golden_settings_polaris10_a11[] = ++{ ++ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, ++ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, ++ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, ++ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, ++ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, ++ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, ++}; ++ + static const u32 cz_golden_settings_a11[] = + { + mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, +@@ -172,6 +205,16 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) + golden_settings_tonga_a11, + (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); + break; ++ case CHIP_POLARIS11: ++ amdgpu_program_register_sequence(adev, ++ golden_settings_polaris11_a11, ++ (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); ++ break; ++ case CHIP_POLARIS10: ++ amdgpu_program_register_sequence(adev, ++ golden_settings_polaris10_a11, ++ (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); ++ break; + case CHIP_CARRIZO: + amdgpu_program_register_sequence(adev, + cz_mgcg_cgcg_init, +@@ -220,6 +263,12 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) + case CHIP_FIJI: + chip_name = "fiji"; + break; ++ case CHIP_POLARIS11: ++ chip_name = "polaris11"; ++ break; ++ case CHIP_POLARIS10: ++ chip_name = "polaris10"; ++ break; + case CHIP_CARRIZO: + chip_name = "carrizo"; + break; +@@ -353,9 +402,10 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) + * Schedule an IB in the DMA ring (VI). + */ + static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, +- struct amdgpu_ib *ib) ++ struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch) + { +- u32 vmid = ib->vm_id & 0xf; ++ u32 vmid = vm_id & 0xf; + u32 next_rptr = ring->wptr + 5; + + while ((next_rptr & 7) != 2) +@@ -878,7 +928,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.length_dw = 8; + +- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); ++ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); + if (r) + goto err1; + +@@ -1176,7 +1226,7 @@ static int sdma_v3_0_sw_init(void *handle) + AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; + + sprintf(ring->name, "sdma%d", i); +- r = amdgpu_ring_init(adev, ring, 256 * 1024, ++ r = amdgpu_ring_init(adev, ring, 1024, + SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, + &adev->sdma.trap_irq, + (i == 0) ? +@@ -1267,57 +1317,6 @@ static int sdma_v3_0_wait_for_idle(void *handle) + return -ETIMEDOUT; + } + +-static void sdma_v3_0_print_status(void *handle) +-{ +- int i, j; +- struct amdgpu_device *adev = (struct amdgpu_device *)handle; +- +- dev_info(adev->dev, "VI SDMA registers\n"); +- dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", +- RREG32(mmSRBM_STATUS2)); +- for (i = 0; i < adev->sdma.num_instances; i++) { +- dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", +- i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n", +- i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_CNTL=0x%08X\n", +- i, RREG32(mmSDMA0_CNTL + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n", +- i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_IB_CNTL=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_RB_CNTL=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_RB_WPTR=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_RB_BASE=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", +- i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); +- mutex_lock(&adev->srbm_mutex); +- for (j = 0; j < 16; j++) { +- vi_srbm_select(adev, 0, 0, 0, j); +- dev_info(adev->dev, " VM %d:\n", j); +- dev_info(adev->dev, " SDMA%d_GFX_VIRTUAL_ADDR=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i])); +- dev_info(adev->dev, " SDMA%d_GFX_APE1_CNTL=0x%08X\n", +- i, RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i])); +- } +- vi_srbm_select(adev, 0, 0, 0, 0); +- mutex_unlock(&adev->srbm_mutex); +- } +-} +- + static int sdma_v3_0_soft_reset(void *handle) + { + u32 srbm_soft_reset = 0; +@@ -1340,8 +1339,6 @@ static int sdma_v3_0_soft_reset(void *handle) + } + + if (srbm_soft_reset) { +- sdma_v3_0_print_status((void *)adev); +- + tmp = RREG32(mmSRBM_SOFT_RESET); + tmp |= srbm_soft_reset; + dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); +@@ -1356,8 +1353,6 @@ static int sdma_v3_0_soft_reset(void *handle) + + /* Wait a little for things to settle down */ + udelay(50); +- +- sdma_v3_0_print_status((void *)adev); + } + + return 0; +@@ -1458,40 +1453,31 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, + return 0; + } + +-static void fiji_update_sdma_medium_grain_clock_gating( ++static void sdma_v3_0_update_sdma_medium_grain_clock_gating( + struct amdgpu_device *adev, + bool enable) + { + uint32_t temp, data; ++ int i; + +- if (enable) { +- temp = data = RREG32(mmSDMA0_CLK_CTRL); +- data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | +- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | +- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | +- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | +- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | +- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | +- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | +- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); +- if (data != temp) +- WREG32(mmSDMA0_CLK_CTRL, data); +- +- temp = data = RREG32(mmSDMA1_CLK_CTRL); +- data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); +- +- if (data != temp) +- WREG32(mmSDMA1_CLK_CTRL, data); ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { ++ for (i = 0; i < adev->sdma.num_instances; i++) { ++ temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]); ++ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | ++ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | ++ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | ++ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | ++ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | ++ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | ++ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | ++ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); ++ if (data != temp) ++ WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data); ++ } + } else { +- temp = data = RREG32(mmSDMA0_CLK_CTRL); +- data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | ++ for (i = 0; i < adev->sdma.num_instances; i++) { ++ temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]); ++ data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | +@@ -1500,54 +1486,35 @@ static void fiji_update_sdma_medium_grain_clock_gating( + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK; + +- if (data != temp) +- WREG32(mmSDMA0_CLK_CTRL, data); +- +- temp = data = RREG32(mmSDMA1_CLK_CTRL); +- data |= SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | +- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK; +- +- if (data != temp) +- WREG32(mmSDMA1_CLK_CTRL, data); ++ if (data != temp) ++ WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data); ++ } + } + } + +-static void fiji_update_sdma_medium_grain_light_sleep( ++static void sdma_v3_0_update_sdma_medium_grain_light_sleep( + struct amdgpu_device *adev, + bool enable) + { + uint32_t temp, data; ++ int i; + +- if (enable) { +- temp = data = RREG32(mmSDMA0_POWER_CNTL); +- data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; +- +- if (temp != data) +- WREG32(mmSDMA0_POWER_CNTL, data); +- +- temp = data = RREG32(mmSDMA1_POWER_CNTL); +- data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { ++ for (i = 0; i < adev->sdma.num_instances; i++) { ++ temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]); ++ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + +- if (temp != data) +- WREG32(mmSDMA1_POWER_CNTL, data); ++ if (temp != data) ++ WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data); ++ } + } else { +- temp = data = RREG32(mmSDMA0_POWER_CNTL); +- data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; +- +- if (temp != data) +- WREG32(mmSDMA0_POWER_CNTL, data); +- +- temp = data = RREG32(mmSDMA1_POWER_CNTL); +- data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; ++ for (i = 0; i < adev->sdma.num_instances; i++) { ++ temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]); ++ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + +- if (temp != data) +- WREG32(mmSDMA1_POWER_CNTL, data); ++ if (temp != data) ++ WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data); ++ } + } + } + +@@ -1558,9 +1525,11 @@ static int sdma_v3_0_set_clockgating_state(void *handle, + + switch (adev->asic_type) { + case CHIP_FIJI: +- fiji_update_sdma_medium_grain_clock_gating(adev, ++ case CHIP_CARRIZO: ++ case CHIP_STONEY: ++ sdma_v3_0_update_sdma_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); +- fiji_update_sdma_medium_grain_light_sleep(adev, ++ sdma_v3_0_update_sdma_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: +@@ -1576,6 +1545,7 @@ static int sdma_v3_0_set_powergating_state(void *handle, + } + + const struct amd_ip_funcs sdma_v3_0_ip_funcs = { ++ .name = "sdma_v3_0", + .early_init = sdma_v3_0_early_init, + .late_init = NULL, + .sw_init = sdma_v3_0_sw_init, +@@ -1587,7 +1557,6 @@ const struct amd_ip_funcs sdma_v3_0_ip_funcs = { + .is_idle = sdma_v3_0_is_idle, + .wait_for_idle = sdma_v3_0_wait_for_idle, + .soft_reset = sdma_v3_0_soft_reset, +- .print_status = sdma_v3_0_print_status, + .set_clockgating_state = sdma_v3_0_set_clockgating_state, + .set_powergating_state = sdma_v3_0_set_powergating_state, + }; +diff --git a/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h b/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h +index c24a81e..880152c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h ++++ b/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h +@@ -44,6 +44,7 @@ + #define UCODE_ID_IH_REG_RESTORE 11 + #define UCODE_ID_VBIOS 12 + #define UCODE_ID_MISC_METADATA 13 ++#define UCODE_ID_SMU_SK 14 + #define UCODE_ID_RLC_SCRATCH 32 + #define UCODE_ID_RLC_SRM_ARAM 33 + #define UCODE_ID_RLC_SRM_DRAM 34 +diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +index 3cc301d..20b61d7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +@@ -538,7 +538,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) + * Write ring commands to execute the indirect buffer + */ + static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, +- struct amdgpu_ib *ib) ++ struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch) + { + amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); +diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +index b90b0ff..4befb62 100644 +--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +@@ -779,7 +779,8 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) + * Write ring commands to execute the indirect buffer + */ + static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, +- struct amdgpu_ib *ib) ++ struct amdgpu_ib *ib, ++ unsigned vm_id, bool ctx_switch) + { + amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); +diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c +index 328707c..0c0c4d1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vi.c ++++ b/drivers/gpu/drm/amd/amdgpu/vi.c +@@ -79,6 +79,11 @@ + #include "amdgpu_dm.h" + #include "amdgpu_powerplay.h" + ++MODULE_FIRMWARE("amdgpu/polaris10_smc.bin"); ++MODULE_FIRMWARE("amdgpu/polaris10_smc_sk.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_smc.bin"); ++MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin"); ++ + /* + * Indirect registers accessor + */ +@@ -277,6 +282,8 @@ static void vi_init_golden_registers(struct amdgpu_device *adev) + stoney_mgcg_cgcg_init, + (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); + break; ++ case CHIP_POLARIS11: ++ case CHIP_POLARIS10: + default: + break; + } +@@ -538,6 +545,8 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, + break; + case CHIP_FIJI: + case CHIP_TONGA: ++ case CHIP_POLARIS11: ++ case CHIP_POLARIS10: + case CHIP_CARRIZO: + case CHIP_STONEY: + asic_register_table = cz_allowed_read_registers; +@@ -908,6 +917,74 @@ static const struct amdgpu_ip_block_version fiji_ip_blocks[] = + }, + }; + ++static const struct amdgpu_ip_block_version polaris11_ip_blocks[] = ++{ ++ /* ORDER MATTERS! */ ++ { ++ .type = AMD_IP_BLOCK_TYPE_COMMON, ++ .major = 2, ++ .minor = 0, ++ .rev = 0, ++ .funcs = &vi_common_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_GMC, ++ .major = 8, ++ .minor = 1, ++ .rev = 0, ++ .funcs = &gmc_v8_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_IH, ++ .major = 3, ++ .minor = 1, ++ .rev = 0, ++ .funcs = &tonga_ih_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_SMC, ++ .major = 7, ++ .minor = 2, ++ .rev = 0, ++ .funcs = &amdgpu_pp_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_DCE, ++ .major = 11, ++ .minor = 2, ++ .rev = 0, ++ .funcs = &dce_v11_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_GFX, ++ .major = 8, ++ .minor = 0, ++ .rev = 0, ++ .funcs = &gfx_v8_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_SDMA, ++ .major = 3, ++ .minor = 1, ++ .rev = 0, ++ .funcs = &sdma_v3_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_UVD, ++ .major = 6, ++ .minor = 3, ++ .rev = 0, ++ .funcs = &uvd_v6_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_VCE, ++ .major = 3, ++ .minor = 4, ++ .rev = 0, ++ .funcs = &vce_v3_0_ip_funcs, ++ }, ++}; ++ + static const struct amdgpu_ip_block_version cz_ip_blocks[] = + { + /* ORDER MATTERS! */ +@@ -1067,6 +1144,75 @@ static const struct amdgpu_ip_block_version cz_ip_blocks_dal[] = + #endif + }; + ++static const struct amdgpu_ip_block_version polaris11_ip_blocks_dal[] = ++{ ++ /* ORDER MATTERS! */ ++ { ++ .type = AMD_IP_BLOCK_TYPE_COMMON, ++ .major = 2, ++ .minor = 0, ++ .rev = 0, ++ .funcs = &vi_common_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_GMC, ++ .major = 8, ++ .minor = 1, ++ .rev = 0, ++ .funcs = &gmc_v8_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_IH, ++ .major = 3, ++ .minor = 1, ++ .rev = 0, ++ .funcs = &tonga_ih_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_SMC, ++ .major = 7, ++ .minor = 2, ++ .rev = 0, ++ /* To Do */ ++ .funcs = &amdgpu_pp_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_DCE, ++ .major = 11, ++ .minor = 2, ++ .rev = 0, ++ .funcs = &amdgpu_dm_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_GFX, ++ .major = 8, ++ .minor = 0, ++ .rev = 0, ++ .funcs = &gfx_v8_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_SDMA, ++ .major = 3, ++ .minor = 1, ++ .rev = 0, ++ .funcs = &sdma_v3_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_UVD, ++ .major = 6, ++ .minor = 3, ++ .rev = 0, ++ .funcs = &uvd_v6_0_ip_funcs, ++ }, ++ { ++ .type = AMD_IP_BLOCK_TYPE_VCE, ++ .major = 3, ++ .minor = 4, ++ .rev = 0, ++ .funcs = &vce_v3_0_ip_funcs, ++ }, ++}; ++ + static const struct amdgpu_ip_block_version tonga_ip_blocks_dal[] = + { + /* ORDER MATTERS! */ +@@ -1213,7 +1359,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) + break; + case CHIP_FIJI: + #if defined(CONFIG_DRM_AMD_DAL) +- if (amdgpu_dal && amdgpu_device_has_dal_support(adev)) { ++ if (amdgpu_device_has_dal_support(adev)) { + adev->ip_blocks = fiji_ip_blocks_dal; + adev->num_ip_blocks = ARRAY_SIZE(fiji_ip_blocks_dal); + } else { +@@ -1227,7 +1373,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) + break; + case CHIP_TONGA: + #if defined(CONFIG_DRM_AMD_DAL) +- if (amdgpu_dal && amdgpu_device_has_dal_support(adev)) { ++ if (amdgpu_device_has_dal_support(adev)) { + adev->ip_blocks = tonga_ip_blocks_dal; + adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks_dal); + } else { +@@ -1239,10 +1385,25 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) + adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks); + #endif + break; ++ case CHIP_POLARIS11: ++ case CHIP_POLARIS10: ++#if defined(CONFIG_DRM_AMD_DAL) ++ if (amdgpu_device_has_dal_support(adev)) { ++ adev->ip_blocks = polaris11_ip_blocks_dal; ++ adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks_dal); ++ } else { ++ adev->ip_blocks = polaris11_ip_blocks; ++ adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks); ++ } ++#else ++ adev->ip_blocks = polaris11_ip_blocks; ++ adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks); ++#endif ++ break; + case CHIP_CARRIZO: + case CHIP_STONEY: + #if defined(CONFIG_DRM_AMD_DAL) +- if (amdgpu_dal && amdgpu_device_has_dal_support(adev)) { ++ if (amdgpu_device_has_dal_support(adev)) { + adev->ip_blocks = cz_ip_blocks_dal; + adev->num_ip_blocks = ARRAY_SIZE(cz_ip_blocks_dal); + } else { +@@ -1286,7 +1447,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = + .get_xclk = &vi_get_xclk, + .set_uvd_clocks = &vi_set_uvd_clocks, + .set_vce_clocks = &vi_set_vce_clocks, +- .get_cu_info = &gfx_v8_0_get_cu_info, + /* these should be moved to their own ip modules */ + .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, + .wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle, +@@ -1326,18 +1486,76 @@ static int vi_common_early_init(void *handle) + adev->external_rev_id = 0x1; + break; + case CHIP_FIJI: +- adev->cg_flags = 0; ++ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | ++ AMD_CG_SUPPORT_GFX_MGLS | ++ AMD_CG_SUPPORT_GFX_RLC_LS | ++ AMD_CG_SUPPORT_GFX_CP_LS | ++ AMD_CG_SUPPORT_GFX_CGTS | ++ AMD_CG_SUPPORT_GFX_CGTS_LS | ++ AMD_CG_SUPPORT_GFX_CGCG | ++ AMD_CG_SUPPORT_GFX_CGLS | ++ AMD_CG_SUPPORT_SDMA_MGCG | ++ AMD_CG_SUPPORT_SDMA_LS | ++ AMD_CG_SUPPORT_BIF_LS | ++ AMD_CG_SUPPORT_HDP_MGCG | ++ AMD_CG_SUPPORT_HDP_LS | ++ AMD_CG_SUPPORT_ROM_MGCG | ++ AMD_CG_SUPPORT_MC_MGCG | ++ AMD_CG_SUPPORT_MC_LS; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x3c; + break; + case CHIP_TONGA: +- adev->cg_flags = 0; ++ adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x14; + break; ++ case CHIP_POLARIS11: ++ adev->cg_flags = 0; ++ adev->pg_flags = 0; ++ adev->external_rev_id = adev->rev_id + 0x5A; ++ break; ++ case CHIP_POLARIS10: ++ adev->cg_flags = 0; ++ adev->pg_flags = 0; ++ adev->external_rev_id = adev->rev_id + 0x50; ++ break; + case CHIP_CARRIZO: ++ adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | ++ AMD_CG_SUPPORT_GFX_MGCG | ++ AMD_CG_SUPPORT_GFX_MGLS | ++ AMD_CG_SUPPORT_GFX_RLC_LS | ++ AMD_CG_SUPPORT_GFX_CP_LS | ++ AMD_CG_SUPPORT_GFX_CGTS | ++ AMD_CG_SUPPORT_GFX_MGLS | ++ AMD_CG_SUPPORT_GFX_CGTS_LS | ++ AMD_CG_SUPPORT_GFX_CGCG | ++ AMD_CG_SUPPORT_GFX_CGLS | ++ AMD_CG_SUPPORT_BIF_LS | ++ AMD_CG_SUPPORT_HDP_MGCG | ++ AMD_CG_SUPPORT_HDP_LS | ++ AMD_CG_SUPPORT_SDMA_MGCG | ++ AMD_CG_SUPPORT_SDMA_LS; ++ /* rev0 hardware doesn't support PG */ ++ adev->pg_flags = 0; ++ if (adev->rev_id != 0x00) ++ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | ++ AMD_PG_SUPPORT_GFX_SMG | ++ AMD_PG_SUPPORT_GFX_DMG | ++ AMD_PG_SUPPORT_CP | ++ AMD_PG_SUPPORT_RLC_SMU_HS | ++ AMD_PG_SUPPORT_GFX_PIPELINE; ++ adev->external_rev_id = adev->rev_id + 0x1; ++ break; + case CHIP_STONEY: +- adev->cg_flags = 0; ++ adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | ++ AMD_CG_SUPPORT_GFX_MGCG | ++ AMD_CG_SUPPORT_GFX_MGLS | ++ AMD_CG_SUPPORT_BIF_LS | ++ AMD_CG_SUPPORT_HDP_MGCG | ++ AMD_CG_SUPPORT_HDP_LS | ++ AMD_CG_SUPPORT_SDMA_MGCG | ++ AMD_CG_SUPPORT_SDMA_LS; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x1; + break; +@@ -1414,24 +1632,19 @@ static int vi_common_wait_for_idle(void *handle) + return 0; + } + +-static void vi_common_print_status(void *handle) +-{ +- return; +-} +- + static int vi_common_soft_reset(void *handle) + { + return 0; + } + +-static void fiji_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev, +- bool enable) ++static void vi_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev, ++ bool enable) + { + uint32_t temp, data; + + temp = data = RREG32_PCIE(ixPCIE_CNTL2); + +- if (enable) ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK; +@@ -1444,14 +1657,14 @@ static void fiji_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev, + WREG32_PCIE(ixPCIE_CNTL2, data); + } + +-static void fiji_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev, +- bool enable) ++static void vi_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev, ++ bool enable) + { + uint32_t temp, data; + + temp = data = RREG32(mmHDP_HOST_PATH_CNTL); + +- if (enable) ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) + data &= ~HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK; + else + data |= HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK; +@@ -1460,14 +1673,14 @@ static void fiji_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev + WREG32(mmHDP_HOST_PATH_CNTL, data); + } + +-static void fiji_update_hdp_light_sleep(struct amdgpu_device *adev, +- bool enable) ++static void vi_update_hdp_light_sleep(struct amdgpu_device *adev, ++ bool enable) + { + uint32_t temp, data; + + temp = data = RREG32(mmHDP_MEM_POWER_LS); + +- if (enable) ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK; + else + data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK; +@@ -1476,14 +1689,14 @@ static void fiji_update_hdp_light_sleep(struct amdgpu_device *adev, + WREG32(mmHDP_MEM_POWER_LS, data); + } + +-static void fiji_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev, +- bool enable) ++static void vi_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev, ++ bool enable) + { + uint32_t temp, data; + + temp = data = RREG32_SMC(ixCGTT_ROM_CLK_CTRL0); + +- if (enable) ++ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); + else +@@ -1495,19 +1708,28 @@ static void fiji_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev + } + + static int vi_common_set_clockgating_state(void *handle, +- enum amd_clockgating_state state) ++ enum amd_clockgating_state state) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_FIJI: +- fiji_update_bif_medium_grain_light_sleep(adev, ++ vi_update_bif_medium_grain_light_sleep(adev, ++ state == AMD_CG_STATE_GATE ? true : false); ++ vi_update_hdp_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); +- fiji_update_hdp_medium_grain_clock_gating(adev, ++ vi_update_hdp_light_sleep(adev, ++ state == AMD_CG_STATE_GATE ? true : false); ++ vi_update_rom_medium_grain_clock_gating(adev, ++ state == AMD_CG_STATE_GATE ? true : false); ++ break; ++ case CHIP_CARRIZO: ++ case CHIP_STONEY: ++ vi_update_bif_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); +- fiji_update_hdp_light_sleep(adev, ++ vi_update_hdp_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); +- fiji_update_rom_medium_grain_clock_gating(adev, ++ vi_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: +@@ -1523,6 +1745,7 @@ static int vi_common_set_powergating_state(void *handle, + } + + const struct amd_ip_funcs vi_common_ip_funcs = { ++ .name = "vi_common", + .early_init = vi_common_early_init, + .late_init = NULL, + .sw_init = vi_common_sw_init, +@@ -1534,7 +1757,6 @@ const struct amd_ip_funcs vi_common_ip_funcs = { + .is_idle = vi_common_is_idle, + .wait_for_idle = vi_common_wait_for_idle, + .soft_reset = vi_common_soft_reset, +- .print_status = vi_common_print_status, + .set_clockgating_state = vi_common_set_clockgating_state, + .set_powergating_state = vi_common_set_powergating_state, + }; +diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h +index 04e4090..45fde50 100644 +--- a/drivers/gpu/drm/amd/include/amd_shared.h ++++ b/drivers/gpu/drm/amd/include/amd_shared.h +@@ -48,6 +48,8 @@ enum amd_asic_type { + CHIP_FIJI, + CHIP_CARRIZO, + CHIP_STONEY, ++ CHIP_POLARIS10, ++ CHIP_POLARIS11, + CHIP_LAST, + }; + +@@ -104,6 +106,7 @@ enum amd_powergating_state { + #define AMD_CG_SUPPORT_VCE_MGCG (1 << 14) + #define AMD_CG_SUPPORT_HDP_LS (1 << 15) + #define AMD_CG_SUPPORT_HDP_MGCG (1 << 16) ++#define AMD_CG_SUPPORT_ROM_MGCG (1 << 17) + + /* PG flags */ + #define AMD_PG_SUPPORT_GFX_PG (1 << 0) +@@ -117,6 +120,8 @@ enum amd_powergating_state { + #define AMD_PG_SUPPORT_SDMA (1 << 8) + #define AMD_PG_SUPPORT_ACP (1 << 9) + #define AMD_PG_SUPPORT_SAMU (1 << 10) ++#define AMD_PG_SUPPORT_GFX_QUICK_MG (1 << 11) ++#define AMD_PG_SUPPORT_GFX_PIPELINE (1 << 12) + + enum amd_pm_state_type { + /* not used for dpm */ +@@ -140,6 +145,8 @@ enum amd_pm_state_type { + }; + + struct amd_ip_funcs { ++ /* Name of IP block */ ++ char *name; + /* sets up early driver state (pre sw_init), does not configure hw - Optional */ + int (*early_init)(void *handle); + /* sets up late driver/hw state (post hw_init) - Optional */ +diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h +index a9b6923..ebaf67b 100644 +--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h ++++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h +@@ -1391,6 +1391,8 @@ + #define mmRLC_CGTT_MGCG_OVERRIDE 0xec48 + #define mmRLC_CGCG_CGLS_CTRL 0xec49 + #define mmRLC_CGCG_RAMP_CTRL 0xec4a ++#define mmRLC_CGCG_CGLS_CTRL_3D 0xec9d ++#define mmRLC_CGCG_RAMP_CTRL_3D 0xec9e + #define mmRLC_DYN_PG_STATUS 0xec4b + #define mmRLC_DYN_PG_REQUEST 0xec4c + #define mmRLC_PG_DELAY 0xec4d +diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h +index eaf451e..32f3e34 100644 +--- a/drivers/gpu/drm/amd/include/atombios.h ++++ b/drivers/gpu/drm/amd/include/atombios.h +@@ -79,9 +79,23 @@ + #define ATOM_PPLL0 2 + #define ATOM_PPLL3 3 + ++#define ATOM_PHY_PLL0 4 ++#define ATOM_PHY_PLL1 5 ++ + #define ATOM_EXT_PLL1 8 ++#define ATOM_GCK_DFS 8 + #define ATOM_EXT_PLL2 9 ++#define ATOM_FCH_CLK 9 + #define ATOM_EXT_CLOCK 10 ++#define ATOM_DP_DTO 11 ++ ++#define ATOM_COMBOPHY_PLL0 20 ++#define ATOM_COMBOPHY_PLL1 21 ++#define ATOM_COMBOPHY_PLL2 22 ++#define ATOM_COMBOPHY_PLL3 23 ++#define ATOM_COMBOPHY_PLL4 24 ++#define ATOM_COMBOPHY_PLL5 25 ++ + #define ATOM_PPLL_INVALID 0xFF + + #define ENCODER_REFCLK_SRC_P1PLL 0 +@@ -224,6 +238,31 @@ typedef struct _ATOM_ROM_HEADER + UCHAR ucReserved; + }ATOM_ROM_HEADER; + ++ ++typedef struct _ATOM_ROM_HEADER_V2_1 ++{ ++ ATOM_COMMON_TABLE_HEADER sHeader; ++ UCHAR uaFirmWareSignature[4]; //Signature to distinguish between Atombios and non-atombios, ++ //atombios should init it as "ATOM", don't change the position ++ USHORT usBiosRuntimeSegmentAddress; ++ USHORT usProtectedModeInfoOffset; ++ USHORT usConfigFilenameOffset; ++ USHORT usCRC_BlockOffset; ++ USHORT usBIOS_BootupMessageOffset; ++ USHORT usInt10Offset; ++ USHORT usPciBusDevInitCode; ++ USHORT usIoBaseAddress; ++ USHORT usSubsystemVendorID; ++ USHORT usSubsystemID; ++ USHORT usPCI_InfoOffset; ++ USHORT usMasterCommandTableOffset;//Offest for SW to get all command table offsets, Don't change the position ++ USHORT usMasterDataTableOffset; //Offest for SW to get all data table offsets, Don't change the position ++ UCHAR ucExtendedFunctionCode; ++ UCHAR ucReserved; ++ ULONG ulPSPDirTableOffset; ++}ATOM_ROM_HEADER_V2_1; ++ ++ + //==============================Command Table Portion==================================== + + +@@ -272,12 +311,12 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{ + USHORT GetSCLKOverMCLKRatio; //Atomic Table, only used by Bios + USHORT SetCRTC_Timing; //Atomic Table, directly used by various SW components,latest version 1.1 + USHORT SetCRTC_OverScan; //Atomic Table, used by various SW components,latest version 1.1 +- USHORT SetCRTC_Replication; //Atomic Table, used only by Bios ++ USHORT GetSMUClockInfo; //Atomic Table, used only by Bios + USHORT SelectCRTC_Source; //Atomic Table, directly used by various SW components,latest version 1.1 + USHORT EnableGraphSurfaces; //Atomic Table, used only by Bios + USHORT UpdateCRTC_DoubleBufferRegisters; //Atomic Table, used only by Bios + USHORT LUT_AutoFill; //Atomic Table, only used by Bios +- USHORT EnableHW_IconCursor; //Atomic Table, only used by Bios ++ USHORT SetDCEClock; //Atomic Table, start from DCE11.1, shared by driver and VBIOS, change DISPCLK and DPREFCLK + USHORT GetMemoryClock; //Atomic Table, directly used by various SW components,latest version 1.1 + USHORT GetEngineClock; //Atomic Table, directly used by various SW components,latest version 1.1 + USHORT SetCRTC_UsingDTDTiming; //Atomic Table, directly used by various SW components,latest version 1.1 +@@ -292,7 +331,7 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{ + USHORT PowerConnectorDetection; //Atomic Table, directly used by various SW components,latest version 1.1 + USHORT MC_Synchronization; //Atomic Table, indirectly used by various SW components,called from SetMemoryClock + USHORT ComputeMemoryEnginePLL; //Atomic Table, indirectly used by various SW components,called from SetMemory/EngineClock +- USHORT MemoryRefreshConversion; //Atomic Table, indirectly used by various SW components,called from SetMemory or SetEngineClock ++ USHORT Gfx_Init; //Atomic Table, indirectly used by various SW components,called from SetMemory or SetEngineClock + USHORT VRAM_GetCurrentInfoBlock; //Atomic Table, used only by Bios + USHORT DynamicMemorySettings; //Atomic Table, indirectly used by various SW components,called from SetMemoryClock + USHORT MemoryTraining; //Atomic Table, used only by Bios +@@ -333,6 +372,10 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{ + #define LCD1OutputControl HW_Misc_Operation + #define TV1OutputControl Gfx_Harvesting + #define TVEncoderControl SMC_Init ++#define EnableHW_IconCursor SetDCEClock ++#define SetCRTC_Replication GetSMUClockInfo ++ ++#define MemoryRefreshConversion Gfx_Init + + typedef struct _ATOM_MASTER_COMMAND_TABLE + { +@@ -425,6 +468,9 @@ typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2 + #define b3FIRST_TIME_CHANGE_CLOCK 0x08 //Applicable to both memory and engine clock change,when set, it means this is 1st time to change clock after ASIC bootup + #define b3SKIP_SW_PROGRAM_PLL 0x10 //Applicable to both memory and engine clock change, when set, it means the table will not program SPLL/MPLL + #define b3DRAM_SELF_REFRESH_EXIT 0x20 //Applicable to DRAM self refresh exit only. when set, it means it will go to program DRAM self refresh exit path ++#define b3SRIOV_INIT_BOOT 0x40 //Use by HV GPU driver only, to load uCode. for ASIC_InitTable SCLK parameter only ++#define b3SRIOV_LOAD_UCODE 0x40 //Use by HV GPU driver only, to load uCode. for ASIC_InitTable SCLK parameter only ++#define b3SRIOV_SKIP_ASIC_INIT 0x02 //Use by HV GPU driver only, skip ASIC_Init for primary adapter boot. for ASIC_InitTable SCLK parameter only + + typedef struct _ATOM_COMPUTE_CLOCK_FREQ + { +@@ -518,6 +564,33 @@ typedef struct _COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 + //ucPllCntlFlag + #define SPLL_CNTL_FLAG_VCO_MODE_MASK 0x03 + ++typedef struct _COMPUTE_GPU_CLOCK_INPUT_PARAMETERS_V1_7 ++{ ++ ATOM_COMPUTE_CLOCK_FREQ ulClock; //Input Parameter ++ ULONG ulReserved[5]; ++}COMPUTE_GPU_CLOCK_INPUT_PARAMETERS_V1_7; ++ ++//ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag ++#define COMPUTE_GPUCLK_INPUT_FLAG_CLK_TYPE_MASK 0x0f ++#define COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK 0x00 ++#define COMPUTE_GPUCLK_INPUT_FLAG_SCLK 0x01 ++ ++typedef struct _COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7 ++{ ++ COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 ulClock; //Output Parameter: ucPostDiv=DFS divider ++ USHORT usSclk_fcw_frac; //fractional divider of fcw = usSclk_fcw_frac/65536 ++ USHORT usSclk_fcw_int; //integer divider of fcwc ++ UCHAR ucSclkPostDiv; //PLL post divider = 2^ucSclkPostDiv ++ UCHAR ucSclkVcoMode; //0: 4G~8Ghz, 1:3G~6Ghz,3: 2G~4Ghz, 2:Reserved ++ UCHAR ucSclkPllRange; //GreenTable SCLK PLL range entry index ( 0~7 ) ++ UCHAR ucSscEnable; ++ USHORT usSsc_fcw1_frac; //fcw1_frac when SSC enable ++ USHORT usSsc_fcw1_int; //fcw1_int when SSC enable ++ USHORT usReserved; ++ USHORT usPcc_fcw_int; ++ USHORT usSsc_fcw_slew_frac; //fcw_slew_frac when SSC enable ++ USHORT usPcc_fcw_slew_frac; ++}COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7; + + // ucInputFlag + #define ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN 1 // 1-StrobeMode, 0-PerformanceMode +@@ -557,12 +630,16 @@ typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 + ULONG ulReserved; + }COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2; + ++//Input parameter of DynamicMemorySettingsTable ++//when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag = COMPUTE_MEMORY_PLL_PARAM + typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER + { + ATOM_COMPUTE_CLOCK_FREQ ulClock; + ULONG ulReserved[2]; + }DYNAMICE_MEMORY_SETTINGS_PARAMETER; + ++//Input parameter of DynamicMemorySettingsTable ++//when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag == COMPUTE_ENGINE_PLL_PARAM + typedef struct _DYNAMICE_ENGINE_SETTINGS_PARAMETER + { + ATOM_COMPUTE_CLOCK_FREQ ulClock; +@@ -570,6 +647,29 @@ typedef struct _DYNAMICE_ENGINE_SETTINGS_PARAMETER + ULONG ulReserved; + }DYNAMICE_ENGINE_SETTINGS_PARAMETER; + ++//Input parameter of DynamicMemorySettingsTable ver2.1 and above ++//when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag == ADJUST_MC_SETTING_PARAM ++typedef struct _DYNAMICE_MC_DPM_SETTINGS_PARAMETER ++{ ++ ATOM_COMPUTE_CLOCK_FREQ ulClock; ++ UCHAR ucMclkDPMState; ++ UCHAR ucReserved[3]; ++ ULONG ulReserved; ++}DYNAMICE_MC_DPM_SETTINGS_PARAMETER; ++ ++//ucMclkDPMState ++#define DYNAMIC_MC_DPM_SETTING_LOW_DPM_STATE 0 ++#define DYNAMIC_MC_DPM_SETTING_MEDIUM_DPM_STATE 1 ++#define DYNAMIC_MC_DPM_SETTING_HIGH_DPM_STATE 2 ++ ++typedef union _DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1 ++{ ++ DYNAMICE_MEMORY_SETTINGS_PARAMETER asMCReg; ++ DYNAMICE_ENGINE_SETTINGS_PARAMETER asMCArbReg; ++ DYNAMICE_MC_DPM_SETTINGS_PARAMETER asDPMMCReg; ++}DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1; ++ ++ + /****************************************************************************/ + // Structures used by SetEngineClockTable + /****************************************************************************/ +@@ -584,6 +684,13 @@ typedef struct _SET_ENGINE_CLOCK_PS_ALLOCATION + COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION sReserved; + }SET_ENGINE_CLOCK_PS_ALLOCATION; + ++typedef struct _SET_ENGINE_CLOCK_PS_ALLOCATION_V1_2 ++{ ++ ULONG ulTargetEngineClock; //In 10Khz unit ++ COMPUTE_GPU_CLOCK_INPUT_PARAMETERS_V1_7 sReserved; ++}SET_ENGINE_CLOCK_PS_ALLOCATION_V1_2; ++ ++ + /****************************************************************************/ + // Structures used by SetMemoryClockTable + /****************************************************************************/ +@@ -827,6 +934,12 @@ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V2 + #define ATOM_ENCODER_CMD_SETUP 0x0f + #define ATOM_ENCODER_CMD_SETUP_PANEL_MODE 0x10 + ++// New Command for DIGxEncoderControlTable v1.5 ++#define ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN4 0x14 ++#define ATOM_ENCODER_CMD_STREAM_SETUP 0x0F //change name ATOM_ENCODER_CMD_SETUP ++#define ATOM_ENCODER_CMD_LINK_SETUP 0x11 //internal use, called by other Command Table ++#define ATOM_ENCODER_CMD_ENCODER_BLANK 0x12 //internal use, called by other Command Table ++ + // ucStatus + #define ATOM_ENCODER_STATUS_LINK_TRAINING_COMPLETE 0x10 + #define ATOM_ENCODER_STATUS_LINK_TRAINING_INCOMPLETE 0x00 +@@ -955,6 +1068,69 @@ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V4 + #define DP_PANEL_MODE_INTERNAL_DP2_MODE 0x01 + #define DP_PANEL_MODE_INTERNAL_DP1_MODE 0x11 + ++ ++typedef struct _ENCODER_STREAM_SETUP_PARAMETERS_V5 ++{ ++ UCHAR ucDigId; // 0~6 map to DIG0~DIG6 ++ UCHAR ucAction; // = ATOM_ENOCODER_CMD_STREAM_SETUP ++ UCHAR ucDigMode; // ATOM_ENCODER_MODE_DP/ATOM_ENCODER_MODE_DVI/ATOM_ENCODER_MODE_HDMI ++ UCHAR ucLaneNum; // Lane number ++ ULONG ulPixelClock; // Pixel Clock in 10Khz ++ UCHAR ucBitPerColor; ++ UCHAR ucLinkRateIn270Mhz;//= DP link rate/270Mhz, =6: 1.62G = 10: 2.7G, =20: 5.4Ghz, =30: 8.1Ghz etc ++ UCHAR ucReserved[2]; ++}ENCODER_STREAM_SETUP_PARAMETERS_V5; ++ ++typedef struct _ENCODER_LINK_SETUP_PARAMETERS_V5 ++{ ++ UCHAR ucDigId; // 0~6 map to DIG0~DIG6 ++ UCHAR ucAction; // = ATOM_ENOCODER_CMD_LINK_SETUP ++ UCHAR ucDigMode; // ATOM_ENCODER_MODE_DP/ATOM_ENCODER_MODE_DVI/ATOM_ENCODER_MODE_HDMI ++ UCHAR ucLaneNum; // Lane number ++ ULONG ulSymClock; // Symbol Clock in 10Khz ++ UCHAR ucHPDSel; ++ UCHAR ucDigEncoderSel; // DIG stream( front-end ) selection, bit0 means DIG0 FE is enable, ++ UCHAR ucReserved[2]; ++}ENCODER_LINK_SETUP_PARAMETERS_V5; ++ ++typedef struct _DP_PANEL_MODE_SETUP_PARAMETERS_V5 ++{ ++ UCHAR ucDigId; // 0~6 map to DIG0~DIG6 ++ UCHAR ucAction; // = ATOM_ENCODER_CMD_DPLINK_SETUP ++ UCHAR ucPanelMode; // =0: external DP ++ // =0x1: internal DP2 ++ // =0x11: internal DP1 NutMeg/Travis DP Translator ++ UCHAR ucReserved; ++ ULONG ulReserved[2]; ++}DP_PANEL_MODE_SETUP_PARAMETERS_V5; ++ ++typedef struct _ENCODER_GENERIC_CMD_PARAMETERS_V5 ++{ ++ UCHAR ucDigId; // 0~6 map to DIG0~DIG6 ++ UCHAR ucAction; // = rest of generic encoder command which does not carry any parameters ++ UCHAR ucReserved[2]; ++ ULONG ulReserved[2]; ++}ENCODER_GENERIC_CMD_PARAMETERS_V5; ++ ++//ucDigId ++#define ATOM_ENCODER_CONFIG_V5_DIG0_ENCODER 0x00 ++#define ATOM_ENCODER_CONFIG_V5_DIG1_ENCODER 0x01 ++#define ATOM_ENCODER_CONFIG_V5_DIG2_ENCODER 0x02 ++#define ATOM_ENCODER_CONFIG_V5_DIG3_ENCODER 0x03 ++#define ATOM_ENCODER_CONFIG_V5_DIG4_ENCODER 0x04 ++#define ATOM_ENCODER_CONFIG_V5_DIG5_ENCODER 0x05 ++#define ATOM_ENCODER_CONFIG_V5_DIG6_ENCODER 0x06 ++ ++ ++typedef union _DIG_ENCODER_CONTROL_PARAMETERS_V5 ++{ ++ ENCODER_GENERIC_CMD_PARAMETERS_V5 asCmdParam; ++ ENCODER_STREAM_SETUP_PARAMETERS_V5 asStreamParam; ++ ENCODER_LINK_SETUP_PARAMETERS_V5 asLinkParam; ++ DP_PANEL_MODE_SETUP_PARAMETERS_V5 asDPPanelModeParam; ++}DIG_ENCODER_CONTROL_PARAMETERS_V5; ++ ++ + /****************************************************************************/ + // Structures used by UNIPHYTransmitterControlTable + // LVTMATransmitterControlTable +@@ -1371,6 +1547,49 @@ typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 + + #define DIG_TRANSMITTER_CONTROL_PS_ALLOCATION_V1_5 DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 + ++typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6 ++{ ++ UCHAR ucPhyId; // 0=UNIPHYA, 1=UNIPHYB, 2=UNIPHYC, 3=UNIPHYD, 4= UNIPHYE 5=UNIPHYF ++ UCHAR ucAction; // define as ATOM_TRANSMITER_ACTION_xxx ++ union ++ { ++ UCHAR ucDigMode; // ATOM_ENCODER_MODE_DP/ATOM_ENCODER_MODE_DVI/ATOM_ENCODER_MODE_HDMI ++ UCHAR ucDPLaneSet; // DP voltage swing and pre-emphasis value defined in DPCD DP_LANE_SET, "DP_LANE_SET__xDB_y_zV" ++ }; ++ UCHAR ucLaneNum; // Lane number ++ ULONG ulSymClock; // Symbol Clock in 10Khz ++ UCHAR ucHPDSel; // =1: HPD1, =2: HPD2, .... =6: HPD6, =0: HPD is not assigned ++ UCHAR ucDigEncoderSel; // DIG stream( front-end ) selection, bit0 means DIG0 FE is enable, ++ UCHAR ucConnObjId; // Connector Object Id defined in ObjectId.h ++ UCHAR ucReserved; ++ ULONG ulReserved; ++}DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6; ++ ++ ++// ucDigEncoderSel ++#define ATOM_TRANMSITTER_V6__DIGA_SEL 0x01 ++#define ATOM_TRANMSITTER_V6__DIGB_SEL 0x02 ++#define ATOM_TRANMSITTER_V6__DIGC_SEL 0x04 ++#define ATOM_TRANMSITTER_V6__DIGD_SEL 0x08 ++#define ATOM_TRANMSITTER_V6__DIGE_SEL 0x10 ++#define ATOM_TRANMSITTER_V6__DIGF_SEL 0x20 ++#define ATOM_TRANMSITTER_V6__DIGG_SEL 0x40 ++ ++// ucDigMode ++#define ATOM_TRANSMITTER_DIGMODE_V6_DP 0 ++#define ATOM_TRANSMITTER_DIGMODE_V6_DVI 2 ++#define ATOM_TRANSMITTER_DIGMODE_V6_HDMI 3 ++#define ATOM_TRANSMITTER_DIGMODE_V6_DP_MST 5 ++ ++//ucHPDSel ++#define ATOM_TRANSMITTER_V6_NO_HPD_SEL 0x00 ++#define ATOM_TRANSMITTER_V6_HPD1_SEL 0x01 ++#define ATOM_TRANSMITTER_V6_HPD2_SEL 0x02 ++#define ATOM_TRANSMITTER_V6_HPD3_SEL 0x03 ++#define ATOM_TRANSMITTER_V6_HPD4_SEL 0x04 ++#define ATOM_TRANSMITTER_V6_HPD5_SEL 0x05 ++#define ATOM_TRANSMITTER_V6_HPD6_SEL 0x06 ++ + + /****************************************************************************/ + // Structures used by ExternalEncoderControlTable V1.3 +@@ -1784,6 +2003,101 @@ typedef struct _GET_DISP_PLL_STATUS_INPUT_PARAMETERS_V3 + PIXEL_CLOCK_PARAMETERS_V5 sDispClkInput; + }GET_DISP_PLL_STATUS_INPUT_PARAMETERS_V3; + ++typedef struct _PIXEL_CLOCK_PARAMETERS_V7 ++{ ++ ULONG ulPixelClock; // target the pixel clock to drive the CRTC timing in unit of 100Hz. ++ ++ UCHAR ucPpll; // ATOM_PHY_PLL0/ATOM_PHY_PLL1/ATOM_PPLL0 ++ UCHAR ucTransmitterID; // ASIC encoder id defined in objectId.h, ++ // indicate which graphic encoder will be used. ++ UCHAR ucEncoderMode; // Encoder mode: ++ UCHAR ucMiscInfo; // bit[0]= Force program PLL for pixclk ++ // bit[1]= Force program PHY PLL only ( internally used by VBIOS only in DP case which PHYPLL is programmed for SYMCLK, not Pixclk ) ++ // bit[5:4]= RefClock source for PPLL. ++ // =0: XTLAIN( default mode ) ++ // =1: pcie ++ // =2: GENLK ++ UCHAR ucCRTC; // ATOM_CRTC1~6, indicate the CRTC controller to ++ UCHAR ucDeepColorRatio; // HDMI panel bit depth: =0: 24bpp =1:30bpp, =2:36bpp ++ UCHAR ucReserved[2]; ++ ULONG ulReserved; ++}PIXEL_CLOCK_PARAMETERS_V7; ++ ++//ucMiscInfo ++#define PIXEL_CLOCK_V7_MISC_FORCE_PROG_PPLL 0x01 ++#define PIXEL_CLOCK_V7_MISC_PROG_PHYPLL 0x02 ++#define PIXEL_CLOCK_V7_MISC_YUV420_MODE 0x04 ++#define PIXEL_CLOCK_V7_MISC_DVI_DUALLINK_EN 0x08 ++#define PIXEL_CLOCK_V7_MISC_REF_DIV_SRC 0x30 ++#define PIXEL_CLOCK_V7_MISC_REF_DIV_SRC_XTALIN 0x00 ++#define PIXEL_CLOCK_V7_MISC_REF_DIV_SRC_PCIE 0x10 ++#define PIXEL_CLOCK_V7_MISC_REF_DIV_SRC_GENLK 0x20 ++ ++//ucDeepColorRatio ++#define PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_DIS 0x00 //00 - DCCG_DEEP_COLOR_DTO_DISABLE: Disable Deep Color DTO ++#define PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_5_4 0x01 //01 - DCCG_DEEP_COLOR_DTO_5_4_RATIO: Set Deep Color DTO to 5:4 ++#define PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_3_2 0x02 //02 - DCCG_DEEP_COLOR_DTO_3_2_RATIO: Set Deep Color DTO to 3:2 ++#define PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_2_1 0x03 //03 - DCCG_DEEP_COLOR_DTO_2_1_RATIO: Set Deep Color DTO to 2:1 ++ ++// SetDCEClockTable input parameter for DCE11.1 ++typedef struct _SET_DCE_CLOCK_PARAMETERS_V1_1 ++{ ++ ULONG ulDISPClkFreq; // target DISPCLK frquency in unit of 10kHz, return real DISPCLK frequency. when ucFlag[1]=1, in unit of 100Hz. ++ UCHAR ucFlag; // bit0=1: DPREFCLK bypass DFS bit0=0: DPREFCLK not bypass DFS ++ UCHAR ucCrtc; // use when enable DCCG pixel clock ucFlag[1]=1 ++ UCHAR ucPpllId; // use when enable DCCG pixel clock ucFlag[1]=1 ++ UCHAR ucDeepColorRatio; // use when enable DCCG pixel clock ucFlag[1]=1 ++}SET_DCE_CLOCK_PARAMETERS_V1_1; ++ ++ ++typedef struct _SET_DCE_CLOCK_PS_ALLOCATION_V1_1 ++{ ++ SET_DCE_CLOCK_PARAMETERS_V1_1 asParam; ++ ULONG ulReserved[2]; ++}SET_DCE_CLOCK_PS_ALLOCATION_V1_1; ++ ++//SET_DCE_CLOCK_PARAMETERS_V1_1.ucFlag ++#define SET_DCE_CLOCK_FLAG_GEN_DPREFCLK 0x01 ++#define SET_DCE_CLOCK_FLAG_DPREFCLK_BYPASS 0x01 ++#define SET_DCE_CLOCK_FLAG_ENABLE_PIXCLK 0x02 ++ ++// SetDCEClockTable input parameter for DCE11.2( POLARIS10 and POLARIS11 ) and above ++typedef struct _SET_DCE_CLOCK_PARAMETERS_V2_1 ++{ ++ ULONG ulDCEClkFreq; // target DCE frequency in unit of 10KHZ, return real DISPCLK/DPREFCLK frequency. ++ UCHAR ucDCEClkType; // =0: DISPCLK =1: DPREFCLK =2: PIXCLK ++ UCHAR ucDCEClkSrc; // ATOM_PLL0 or ATOM_GCK_DFS or ATOM_FCH_CLK or ATOM_COMBOPHY_PLLx ++ UCHAR ucDCEClkFlag; // Bit [1:0] = PPLL ref clock source ( when ucDCEClkSrc= ATOM_PPLL0 ) ++ UCHAR ucCRTC; // ucDisp Pipe Id, ATOM_CRTC0/1/2/..., use only when ucDCEClkType = PIXCLK ++}SET_DCE_CLOCK_PARAMETERS_V2_1; ++ ++//ucDCEClkType ++#define DCE_CLOCK_TYPE_DISPCLK 0 ++#define DCE_CLOCK_TYPE_DPREFCLK 1 ++#define DCE_CLOCK_TYPE_PIXELCLK 2 // used by VBIOS internally, called by SetPixelClockTable ++ ++//ucDCEClkFlag when ucDCEClkType == DPREFCLK ++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_MASK 0x03 ++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_GENERICA 0x00 ++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_GENLK 0x01 ++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_PCIE 0x02 ++#define DCE_CLOCK_FLAG_PLL_REFCLK_SRC_XTALIN 0x03 ++ ++//ucDCEClkFlag when ucDCEClkType == PIXCLK ++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_MASK 0x03 ++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_DIS 0x00 //00 - DCCG_DEEP_COLOR_DTO_DISABLE: Disable Deep Color DTO ++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_5_4 0x01 //01 - DCCG_DEEP_COLOR_DTO_5_4_RATIO: Set Deep Color DTO to 5:4 ++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_3_2 0x02 //02 - DCCG_DEEP_COLOR_DTO_3_2_RATIO: Set Deep Color DTO to 3:2 ++#define DCE_CLOCK_FLAG_PCLK_DEEPCOLOR_RATIO_2_1 0x03 //03 - DCCG_DEEP_COLOR_DTO_2_1_RATIO: Set Deep Color DTO to 2:1 ++#define DCE_CLOCK_FLAG_PIXCLK_YUV420_MODE 0x04 ++ ++typedef struct _SET_DCE_CLOCK_PS_ALLOCATION_V2_1 ++{ ++ SET_DCE_CLOCK_PARAMETERS_V2_1 asParam; ++ ULONG ulReserved[2]; ++}SET_DCE_CLOCK_PS_ALLOCATION_V2_1; ++ ++ + + /****************************************************************************/ + // Structures used by AdjustDisplayPllTable +@@ -2300,6 +2614,11 @@ typedef struct _SET_VOLTAGE_PARAMETERS_V1_3 + #define VOLTAGE_TYPE_VDDCI 4 + #define VOLTAGE_TYPE_VDDGFX 5 + #define VOLTAGE_TYPE_PCC 6 ++#define VOLTAGE_TYPE_MVPP 7 ++#define VOLTAGE_TYPE_LEDDPM 8 ++#define VOLTAGE_TYPE_PCC_MVDD 9 ++#define VOLTAGE_TYPE_PCIE_VDDC 10 ++#define VOLTAGE_TYPE_PCIE_VDDR 11 + + #define VOLTAGE_TYPE_GENERIC_I2C_1 0x11 + #define VOLTAGE_TYPE_GENERIC_I2C_2 0x12 +@@ -2396,6 +2715,39 @@ typedef struct _GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 + USHORT usTDP_Power; // TDP_Current in unit of 0.1W + }GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2; + ++ ++// New Added from CI Hawaii for GetVoltageInfoTable, input parameter structure ++typedef struct _GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_3 ++{ ++ UCHAR ucVoltageType; // Input: To tell which voltage to set up, VDDC/MVDDC/MVDDQ/VDDCI ++ UCHAR ucVoltageMode; // Input: Indicate action: Get voltage info ++ USHORT usVoltageLevel; // Input: real voltage level in unit of mv or Voltage Phase (0, 1, 2, .. ) or Leakage Id ++ ULONG ulSCLKFreq; // Input: when ucVoltageMode= ATOM_GET_VOLTAGE_EVV_VOLTAGE, DPM state SCLK frequency, Define in PPTable SCLK/Voltage dependence table ++ ULONG ulReserved[3]; ++}GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_3; ++ ++// New Added from CI Hawaii for EVV feature ++typedef struct _GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 ++{ ++ ULONG ulVoltageLevel; // real voltage level in unit of 0.01mv ++ ULONG ulReserved[4]; ++}GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3; ++ ++ ++/****************************************************************************/ ++// Structures used by GetSMUClockInfo ++/****************************************************************************/ ++typedef struct _GET_SMU_CLOCK_INFO_INPUT_PARAMETER_V2_1 ++{ ++ ULONG ulDfsPllOutputFreq:24; ++ ULONG ucDfsDivider:8; ++}GET_SMU_CLOCK_INFO_INPUT_PARAMETER_V2_1; ++ ++typedef struct _GET_SMU_CLOCK_INFO_OUTPUT_PARAMETER_V2_1 ++{ ++ ULONG ulDfsOutputFreq; ++}GET_SMU_CLOCK_INFO_OUTPUT_PARAMETER_V2_1; ++ + /****************************************************************************/ + // Structures used by TVEncoderControlTable + /****************************************************************************/ +@@ -2429,13 +2781,13 @@ typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES + USHORT PaletteData; // Only used by BIOS + USHORT LCD_Info; // Shared by various SW components,latest version 1.3, was called LVDS_Info + USHORT DIGTransmitterInfo; // Internal used by VBIOS only version 3.1 +- USHORT AnalogTV_Info; // Shared by various SW components,latest version 1.1 ++ USHORT SMU_Info; // Shared by various SW components,latest version 1.1 + USHORT SupportedDevicesInfo; // Will be obsolete from R600 + USHORT GPIO_I2C_Info; // Shared by various SW components,latest version 1.2 will be used from R600 + USHORT VRAM_UsageByFirmware; // Shared by various SW components,latest version 1.3 will be used from R600 + USHORT GPIO_Pin_LUT; // Shared by various SW components,latest version 1.1 + USHORT VESA_ToInternalModeLUT; // Only used by Bios +- USHORT ComponentVideoInfo; // Shared by various SW components,latest version 2.1 will be used from R600 ++ USHORT GFX_Info; // Shared by various SW components,latest version 2.1 will be used from R600 + USHORT PowerPlayInfo; // Shared by various SW components,latest version 2.1,new design from R600 + USHORT GPUVirtualizationInfo; // Will be obsolete from R600 + USHORT SaveRestoreInfo; // Only used by Bios +@@ -2455,7 +2807,7 @@ typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES + USHORT ASIC_ProfilingInfo; // New table name from R600, used to be called "ASIC_VDDCI_Info" for pre-R600 + USHORT VoltageObjectInfo; // Shared by various SW components, latest version 1.1 + USHORT PowerSourceInfo; // Shared by various SW components, latest versoin 1.1 +- USHORT ServiceInfo; ++ USHORT ServiceInfo; + }ATOM_MASTER_LIST_OF_DATA_TABLES; + + typedef struct _ATOM_MASTER_DATA_TABLE +@@ -2469,6 +2821,8 @@ typedef struct _ATOM_MASTER_DATA_TABLE + #define DAC_Info PaletteData + #define TMDS_Info DIGTransmitterInfo + #define CompassionateData GPUVirtualizationInfo ++#define AnalogTV_Info SMU_Info ++#define ComponentVideoInfo GFX_Info + + /****************************************************************************/ + // Structure used in MultimediaCapabilityInfoTable +@@ -4278,10 +4632,15 @@ typedef struct _EXT_DISPLAY_PATH + #define MAX_NUMBER_OF_EXT_DISPLAY_PATH 7 + + //usCaps +-#define EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE 0x01 +-#define EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN 0x02 +-#define EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 0x04 +-#define EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT 0x08 ++#define EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE 0x0001 ++#define EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN 0x0002 ++#define EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK 0x007C ++#define EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 (0x01 << 2 ) //PI redriver chip ++#define EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT (0x02 << 2 ) //TI retimer chip ++#define EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 (0x03 << 2 ) //Parade DP->HDMI recoverter chip ++ ++ ++ + + typedef struct _ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO + { +@@ -4325,10 +4684,10 @@ typedef struct _ATOM_COMMON_RECORD_HEADER + #define ATOM_CONNECTOR_REMOTE_CAP_RECORD_TYPE 19 + #define ATOM_ENCODER_CAP_RECORD_TYPE 20 + #define ATOM_BRACKET_LAYOUT_RECORD_TYPE 21 +- ++#define ATOM_CONNECTOR_FORCED_TMDS_CAP_RECORD_TYPE 22 + + //Must be updated when new record type is added,equal to that record definition! +-#define ATOM_MAX_OBJECT_RECORD_NUMBER ATOM_ENCODER_CAP_RECORD_TYPE ++#define ATOM_MAX_OBJECT_RECORD_NUMBER ATOM_CONNECTOR_FORCED_TMDS_CAP_RECORD_TYPE + + typedef struct _ATOM_I2C_RECORD + { +@@ -4458,10 +4817,12 @@ typedef struct _ATOM_ENCODER_DVO_CF_RECORD + UCHAR ucPadding[2]; + }ATOM_ENCODER_DVO_CF_RECORD; + +-// Bit maps for ATOM_ENCODER_CAP_RECORD.ucEncoderCap +-#define ATOM_ENCODER_CAP_RECORD_HBR2 0x01 // DP1.2 HBR2 is supported by HW encoder ++// Bit maps for ATOM_ENCODER_CAP_RECORD.usEncoderCap ++#define ATOM_ENCODER_CAP_RECORD_HBR2 0x01 // DP1.2 HBR2 is supported by HW encoder, it is retired in NI. the real meaning from SI is MST_EN ++#define ATOM_ENCODER_CAP_RECORD_MST_EN 0x01 // from SI, this bit means DP MST is enable or not. + #define ATOM_ENCODER_CAP_RECORD_HBR2_EN 0x02 // DP1.2 HBR2 setting is qualified and HBR2 can be enabled + #define ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN 0x04 // HDMI2.0 6Gbps enable or not. ++#define ATOM_ENCODER_CAP_RECORD_HBR3_EN 0x08 // DP1.3 HBR3 is supported by board. + + typedef struct _ATOM_ENCODER_CAP_RECORD + { +@@ -4482,6 +4843,31 @@ typedef struct _ATOM_ENCODER_CAP_RECORD + }; + }ATOM_ENCODER_CAP_RECORD; + ++// Used after SI ++typedef struct _ATOM_ENCODER_CAP_RECORD_V2 ++{ ++ ATOM_COMMON_RECORD_HEADER sheader; ++ union { ++ USHORT usEncoderCap; ++ struct { ++#if ATOM_BIG_ENDIAN ++ USHORT usReserved:12; // Bit4-15 may be defined for other capability in future ++ USHORT usHBR3En:1; // bit3 is for DP1.3 HBR3 enable ++ USHORT usHDMI6GEn:1; // Bit2 is for HDMI6Gbps enable, this bit is used starting from CZ( APU) Ellemere (dGPU) ++ USHORT usHBR2En:1; // Bit1 is for DP1.2 HBR2 enable ++ USHORT usMSTEn:1; // Bit0 is for DP1.2 MST enable ++#else ++ USHORT usMSTEn:1; // Bit0 is for DP1.2 MST enable ++ USHORT usHBR2En:1; // Bit1 is for DP1.2 HBR2 enable ++ USHORT usHDMI6GEn:1; // Bit2 is for HDMI6Gbps enable, this bit is used starting from CZ( APU) Ellemere (dGPU) ++ USHORT usHBR3En:1; // bit3 is for DP1.3 HBR3 enable ++ USHORT usReserved:12; // Bit4-15 may be defined for other capability in future ++#endif ++ }; ++ }; ++}ATOM_ENCODER_CAP_RECORD_V2; ++ ++ + // value for ATOM_CONNECTOR_CF_RECORD.ucConnectedDvoBundle + #define ATOM_CONNECTOR_CF_RECORD_CONNECTED_UPPER12BITBUNDLEA 1 + #define ATOM_CONNECTOR_CF_RECORD_CONNECTED_LOWER12BITBUNDLEB 2 +@@ -4554,6 +4940,16 @@ typedef struct _ATOM_CONNECTOR_REMOTE_CAP_RECORD + USHORT usReserved; + }ATOM_CONNECTOR_REMOTE_CAP_RECORD; + ++ ++typedef struct _ATOM_CONNECTOR_FORCED_TMDS_CAP_RECORD ++{ ++ ATOM_COMMON_RECORD_HEADER sheader; ++ // override TMDS capability on this connector when it operate in TMDS mode. usMaxTmdsClkRate = max TMDS Clock in Mhz/2.5 ++ UCHAR ucMaxTmdsClkRateIn2_5Mhz; ++ UCHAR ucReserved; ++} ATOM_CONNECTOR_FORCED_TMDS_CAP_RECORD; ++ ++ + typedef struct _ATOM_CONNECTOR_LAYOUT_INFO + { + USHORT usConnectorObjectId; +@@ -4657,12 +5053,12 @@ typedef struct _ATOM_VOLTAGE_CONTROL + #define VOLTAGE_CONTROL_ID_UP1801 0x0C + #define VOLTAGE_CONTROL_ID_ST6788A 0x0D + #define VOLTAGE_CONTROL_ID_CHLIR3564SVI2 0x0E +-#define VOLTAGE_CONTROL_ID_AD527x 0x0F +-#define VOLTAGE_CONTROL_ID_NCP81022 0x10 +-#define VOLTAGE_CONTROL_ID_LTC2635 0x11 +-#define VOLTAGE_CONTROL_ID_NCP4208 0x12 ++#define VOLTAGE_CONTROL_ID_AD527x 0x0F ++#define VOLTAGE_CONTROL_ID_NCP81022 0x10 ++#define VOLTAGE_CONTROL_ID_LTC2635 0x11 ++#define VOLTAGE_CONTROL_ID_NCP4208 0x12 + #define VOLTAGE_CONTROL_ID_IR35xx 0x13 +-#define VOLTAGE_CONTROL_ID_RT9403 0x14 ++#define VOLTAGE_CONTROL_ID_RT9403 0x14 + + #define VOLTAGE_CONTROL_ID_GENERIC_I2C 0x40 + +@@ -4784,11 +5180,38 @@ typedef struct _ATOM_SVID2_VOLTAGE_OBJECT_V3 + ULONG ulReserved; + }ATOM_SVID2_VOLTAGE_OBJECT_V3; + ++ ++ ++typedef struct _ATOM_MERGED_VOLTAGE_OBJECT_V3 ++{ ++ ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader; // voltage mode = VOLTAGE_OBJ_MERGED_POWER ++ UCHAR ucMergedVType; // VDDC/VDCCI/.... ++ UCHAR ucReserved[3]; ++}ATOM_MERGED_VOLTAGE_OBJECT_V3; ++ ++ ++typedef struct _ATOM_EVV_DPM_INFO ++{ ++ ULONG ulDPMSclk; // DPM state SCLK ++ USHORT usVAdjOffset; // Adjust Voltage offset in unit of mv ++ UCHAR ucDPMTblVIndex; // Voltage Index in SMC_DPM_Table structure VddcTable/VddGfxTable ++ UCHAR ucDPMState; // DPMState0~7 ++} ATOM_EVV_DPM_INFO; ++ ++// ucVoltageMode = VOLTAGE_OBJ_EVV ++typedef struct _ATOM_EVV_VOLTAGE_OBJECT_V3 ++{ ++ ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader; // voltage mode = VOLTAGE_OBJ_SVID2 ++ ATOM_EVV_DPM_INFO asEvvDpmList[8]; ++}ATOM_EVV_VOLTAGE_OBJECT_V3; ++ ++ + typedef union _ATOM_VOLTAGE_OBJECT_V3{ + ATOM_GPIO_VOLTAGE_OBJECT_V3 asGpioVoltageObj; + ATOM_I2C_VOLTAGE_OBJECT_V3 asI2cVoltageObj; + ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 asLeakageObj; + ATOM_SVID2_VOLTAGE_OBJECT_V3 asSVID2Obj; ++ ATOM_EVV_VOLTAGE_OBJECT_V3 asEvvObj; + }ATOM_VOLTAGE_OBJECT_V3; + + typedef struct _ATOM_VOLTAGE_OBJECT_INFO_V3_1 +@@ -4963,7 +5386,11 @@ typedef struct _ATOM_ASIC_PROFILING_INFO_V3_3 + ULONG ulLkgEncodeMax; + ULONG ulLkgEncodeMin; + ULONG ulEfuseLogisticAlpha; ++ ++ union{ + USHORT usPowerDpm0; ++ USHORT usParamNegFlag; //bit0 =1 :indicate ulRoBeta is Negative, bit1=1 indicate Kv_m max is postive ++ }; + USHORT usPowerDpm1; + USHORT usPowerDpm2; + USHORT usPowerDpm3; +@@ -5067,6 +5494,86 @@ typedef struct _ATOM_ASIC_PROFILING_INFO_V3_4 + ULONG ulReserved[8]; // Reserved for future ASIC + }ATOM_ASIC_PROFILING_INFO_V3_4; + ++// for Polaris10/Polaris11 speed EVV algorithm ++typedef struct _ATOM_ASIC_PROFILING_INFO_V3_5 ++{ ++ ATOM_COMMON_TABLE_HEADER asHeader; ++ ULONG ulMaxVddc; //Maximum voltage for all parts, in unit of 0.01mv ++ ULONG ulMinVddc; //Minimum voltage for all parts, in unit of 0.01mv ++ USHORT usLkgEuseIndex; //Efuse Lkg_FT address ( BYTE address ) ++ UCHAR ucLkgEfuseBitLSB; //Efuse Lkg_FT bit shift in 32bit DWORD ++ UCHAR ucLkgEfuseLength; //Efuse Lkg_FT length ++ ULONG ulLkgEncodeLn_MaxDivMin; //value of ln(Max_Lkg_Ft/Min_Lkg_Ft ) in unit of 0.00001 ( unit=100000 ) ++ ULONG ulLkgEncodeMax; //Maximum Lkg_Ft measured value ( or efuse decode value ), in unit of 0.00001 ( unit=100000 ) ++ ULONG ulLkgEncodeMin; //Minimum Lkg_Ft measured value ( or efuse decode value ), in unit of 0.00001 ( unit=100000 ) ++ EFUSE_LINEAR_FUNC_PARAM sRoFuse;//Efuse RO info: DWORD address, bit shift, length, max/min measure value. in unit of 1. ++ ULONG ulEvvDefaultVddc; //def="EVV_DEFAULT_VDDC" descr="return default VDDC(v) when Efuse not cut" unit="100000"/> ++ ULONG ulEvvNoCalcVddc; //def="EVV_NOCALC_VDDC" descr="return VDDC(v) when Calculation is bad" unit="100000"/> ++ ULONG ulSpeed_Model; //def="EVV_SPEED_MODEL" descr="0 = Greek model, 1 = multivariate model" unit="1"/> ++ ULONG ulSM_A0; //def="EVV_SM_A0" descr="Leakage coeff(Multivariant Mode)." unit="100000"/> ++ ULONG ulSM_A1; //def="EVV_SM_A1" descr="Leakage/SCLK coeff(Multivariant Mode)." unit="1000000"/> ++ ULONG ulSM_A2; //def="EVV_SM_A2" descr="Alpha( Greek Mode ) or VDDC/SCLK coeff(Multivariant Mode)." unit="100000"/> ++ ULONG ulSM_A3; //def="EVV_SM_A3" descr="Beta( Greek Mode ) or SCLK coeff(Multivariant Mode)." unit="100000"/> ++ ULONG ulSM_A4; //def="EVV_SM_A4" descr="VDDC^2/SCLK coeff(Multivariant Mode)." unit="100000"/> ++ ULONG ulSM_A5; //def="EVV_SM_A5" descr="VDDC^2 coeff(Multivariant Mode)." unit="100000"/> ++ ULONG ulSM_A6; //def="EVV_SM_A6" descr="Gamma( Greek Mode ) or VDDC coeff(Multivariant Mode)." unit="100000"/> ++ ULONG ulSM_A7; //def="EVV_SM_A7" descr="Epsilon( Greek Mode ) or constant(Multivariant Mode)." unit="100000"/> ++ UCHAR ucSM_A0_sign; //def="EVV_SM_A0_SIGN" descr="=0 SM_A0 is postive. =1: SM_A0 is negative" unit="1"/> ++ UCHAR ucSM_A1_sign; //def="EVV_SM_A1_SIGN" descr="=0 SM_A1 is postive. =1: SM_A1 is negative" unit="1"/> ++ UCHAR ucSM_A2_sign; //def="EVV_SM_A2_SIGN" descr="=0 SM_A2 is postive. =1: SM_A2 is negative" unit="1"/> ++ UCHAR ucSM_A3_sign; //def="EVV_SM_A3_SIGN" descr="=0 SM_A3 is postive. =1: SM_A3 is negative" unit="1"/> ++ UCHAR ucSM_A4_sign; //def="EVV_SM_A4_SIGN" descr="=0 SM_A4 is postive. =1: SM_A4 is negative" unit="1"/> ++ UCHAR ucSM_A5_sign; //def="EVV_SM_A5_SIGN" descr="=0 SM_A5 is postive. =1: SM_A5 is negative" unit="1"/> ++ UCHAR ucSM_A6_sign; //def="EVV_SM_A6_SIGN" descr="=0 SM_A6 is postive. =1: SM_A6 is negative" unit="1"/> ++ UCHAR ucSM_A7_sign; //def="EVV_SM_A7_SIGN" descr="=0 SM_A7 is postive. =1: SM_A7 is negative" unit="1"/> ++ ULONG ulMargin_RO_a; //def="EVV_MARGIN_RO_A" descr="A Term to represent RO equation in Ax2+Bx+C, unit=1" ++ ULONG ulMargin_RO_b; //def="EVV_MARGIN_RO_B" descr="B Term to represent RO equation in Ax2+Bx+C, unit=1" ++ ULONG ulMargin_RO_c; //def="EVV_MARGIN_RO_C" descr="C Term to represent RO equation in Ax2+Bx+C, unit=1" ++ ULONG ulMargin_fixed; //def="EVV_MARGIN_FIXED" descr="Fixed MHz to add to SCLK margin, unit=1" unit="1"/> ++ ULONG ulMargin_Fmax_mean; //def="EVV_MARGIN_FMAX_MEAN" descr="Percentage to add for Fmas mean margin unit=10000" unit="10000"/> ++ ULONG ulMargin_plat_mean; //def="EVV_MARGIN_PLAT_MEAN" descr="Percentage to add for platform mean margin unit=10000" unit="10000"/> ++ ULONG ulMargin_Fmax_sigma; //def="EVV_MARGIN_FMAX_SIGMA" descr="Percentage to add for Fmax sigma margin unit=10000" unit="10000"/> ++ ULONG ulMargin_plat_sigma; //def="EVV_MARGIN_PLAT_SIGMA" descr="Percentage to add for platform sigma margin unit=10000" unit="10000"/> ++ ULONG ulMargin_DC_sigma; //def="EVV_MARGIN_DC_SIGMA" descr="Regulator DC tolerance margin (mV) unit=100" unit="100"/> ++ ULONG ulReserved[12]; ++}ATOM_ASIC_PROFILING_INFO_V3_5; ++ ++ ++typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{ ++ ULONG ulMaxSclkFreq; ++ UCHAR ucVco_setting; // 1: 3-6GHz, 3: 2-4GHz ++ UCHAR ucPostdiv; // divide by 2^n ++ USHORT ucFcw_pcc; ++ USHORT ucFcw_trans_upper; ++ USHORT ucRcw_trans_lower; ++}ATOM_SCLK_FCW_RANGE_ENTRY_V1; ++ ++ ++// SMU_InfoTable for Polaris10/Polaris11 ++typedef struct _ATOM_SMU_INFO_V2_1 ++{ ++ ATOM_COMMON_TABLE_HEADER asHeader; ++ UCHAR ucSclkEntryNum; // for potential future extend, indicate the number of ATOM_SCLK_FCW_RANGE_ENTRY_V1 ++ UCHAR ucReserved[3]; ++ ATOM_SCLK_FCW_RANGE_ENTRY_V1 asSclkFcwRangeEntry[8]; ++}ATOM_SMU_INFO_V2_1; ++ ++ ++// GFX_InfoTable for Polaris10/Polaris11 ++typedef struct _ATOM_GFX_INFO_V2_1 ++{ ++ ATOM_COMMON_TABLE_HEADER asHeader; ++ UCHAR GfxIpMinVer; ++ UCHAR GfxIpMajVer; ++ UCHAR max_shader_engines; ++ UCHAR max_tile_pipes; ++ UCHAR max_cu_per_sh; ++ UCHAR max_sh_per_se; ++ UCHAR max_backends_per_se; ++ UCHAR max_texture_channel_caches; ++}ATOM_GFX_INFO_V2_1; ++ ++ + typedef struct _ATOM_POWER_SOURCE_OBJECT + { + UCHAR ucPwrSrcId; // Power source +@@ -5765,14 +6272,6 @@ sExtDispConnInfo: Display connector information table provided t + + **********************************************************************************************************************/ + +-// this Table is used for Kaveri/Kabini APU +-typedef struct _ATOM_FUSION_SYSTEM_INFO_V2 +-{ +- ATOM_INTEGRATED_SYSTEM_INFO_V1_8 sIntegratedSysInfo; // refer to ATOM_INTEGRATED_SYSTEM_INFO_V1_8 definition +- ULONG ulPowerplayTable[128]; // Update comments here to link new powerplay table definition structure +-}ATOM_FUSION_SYSTEM_INFO_V2; +- +- + typedef struct _ATOM_I2C_REG_INFO + { + UCHAR ucI2cRegIndex; +@@ -5859,7 +6358,50 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_9 + #define EDP_VS_VARIABLE_PREM_MODE 5 + + +-// this IntegrateSystemInfoTable is used for Carrizo ++// ulGPUCapInfo ++#define SYS_INFO_V1_9_GPUCAPSINFO_DISABLE_AUX_MODE_DETECT 0x08 ++#define SYS_INFO_V1_9_GPUCAPSINFO_ENABEL_DFS_BYPASS 0x10 ++//ulGPUCapInfo[16]=1 indicate SMC firmware is able to support GNB fast resume function, so that driver can call SMC to program most of GNB register during resuming, from ML ++#define SYS_INFO_V1_9_GPUCAPSINFO_GNB_FAST_RESUME_CAPABLE 0x00010000 ++//ulGPUCapInfo[18]=1 indicate the IOMMU is not available ++#define SYS_INFO_V1_9_GPUCAPINFO_IOMMU_DISABLE 0x00040000 ++//ulGPUCapInfo[19]=1 indicate the MARC Aperture is opened. ++#define SYS_INFO_V1_9_GPUCAPINFO_MARC_APERTURE_ENABLE 0x00080000 ++ ++ ++typedef struct _DPHY_TIMING_PARA ++{ ++ UCHAR ucProfileID; // SENSOR_PROFILES ++ ULONG ucPara; ++} DPHY_TIMING_PARA; ++ ++typedef struct _DPHY_ELEC_PARA ++{ ++ USHORT usPara[3]; ++} DPHY_ELEC_PARA; ++ ++typedef struct _CAMERA_MODULE_INFO ++{ ++ UCHAR ucID; // 0: Rear, 1: Front right of user, 2: Front left of user ++ UCHAR strModuleName[8]; ++ DPHY_TIMING_PARA asTimingPara[6]; // Exact number is under estimation and confirmation from sensor vendor ++} CAMERA_MODULE_INFO; ++ ++typedef struct _FLASHLIGHT_INFO ++{ ++ UCHAR ucID; // 0: Rear, 1: Front ++ UCHAR strName[8]; ++} FLASHLIGHT_INFO; ++ ++typedef struct _CAMERA_DATA ++{ ++ ULONG ulVersionCode; ++ CAMERA_MODULE_INFO asCameraInfo[3]; // Assuming 3 camera sensors max ++ FLASHLIGHT_INFO asFlashInfo; // Assuming 1 flashlight max ++ DPHY_ELEC_PARA asDphyElecPara; ++ ULONG ulCrcVal; // CRC ++}CAMERA_DATA; ++ + typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_10 + { + ATOM_COMMON_TABLE_HEADER sHeader; +@@ -5883,7 +6425,7 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_10 + USHORT usPanelRefreshRateRange; + UCHAR ucMemoryType; + UCHAR ucUMAChannelNumber; +- UCHAR strVBIOSMsg[40]; ++ ULONG ulMsgReserved[10]; + ATOM_TDP_CONFIG asTdpConfig; + ULONG ulReserved[7]; + ATOM_CLK_VOLT_CAPABILITY_V2 sDispClkVoltageMapping[8]; +@@ -5925,8 +6467,27 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_10 + UCHAR ucEDPv1_4VSMode; + UCHAR ucReserved2; + ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo; ++ CAMERA_DATA asCameraInfo; ++ ULONG ulReserved8[29]; + }ATOM_INTEGRATED_SYSTEM_INFO_V1_10; + ++ ++// this Table is used for Kaveri/Kabini APU ++typedef struct _ATOM_FUSION_SYSTEM_INFO_V2 ++{ ++ ATOM_INTEGRATED_SYSTEM_INFO_V1_8 sIntegratedSysInfo; // refer to ATOM_INTEGRATED_SYSTEM_INFO_V1_8 definition ++ ULONG ulPowerplayTable[128]; // Update comments here to link new powerplay table definition structure ++}ATOM_FUSION_SYSTEM_INFO_V2; ++ ++ ++typedef struct _ATOM_FUSION_SYSTEM_INFO_V3 ++{ ++ ATOM_INTEGRATED_SYSTEM_INFO_V1_10 sIntegratedSysInfo; // refer to ATOM_INTEGRATED_SYSTEM_INFO_V1_8 definition ++ ULONG ulPowerplayTable[192]; // Reserve 768 bytes space for PowerPlayInfoTable ++}ATOM_FUSION_SYSTEM_INFO_V3; ++ ++#define FUSION_V3_OFFSET_FROM_TOP_OF_FB 0x800 ++ + /**************************************************************************/ + // This portion is only used when ext thermal chip or engine/memory clock SS chip is populated on a design + //Memory SS Info Table +@@ -6193,12 +6754,12 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 + #define ATOM_S3_DFP1_ACTIVE 0x00000008L + #define ATOM_S3_CRT2_ACTIVE 0x00000010L + #define ATOM_S3_LCD2_ACTIVE 0x00000020L +-#define ATOM_S3_DFP6_ACTIVE 0x00000040L ++#define ATOM_S3_DFP6_ACTIVE 0x00000040L + #define ATOM_S3_DFP2_ACTIVE 0x00000080L + #define ATOM_S3_CV_ACTIVE 0x00000100L +-#define ATOM_S3_DFP3_ACTIVE 0x00000200L +-#define ATOM_S3_DFP4_ACTIVE 0x00000400L +-#define ATOM_S3_DFP5_ACTIVE 0x00000800L ++#define ATOM_S3_DFP3_ACTIVE 0x00000200L ++#define ATOM_S3_DFP4_ACTIVE 0x00000400L ++#define ATOM_S3_DFP5_ACTIVE 0x00000800L + + + #define ATOM_S3_DEVICE_ACTIVE_MASK 0x00000FFFL +@@ -6215,9 +6776,9 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 + #define ATOM_S3_DFP6_CRTC_ACTIVE 0x00400000L + #define ATOM_S3_DFP2_CRTC_ACTIVE 0x00800000L + #define ATOM_S3_CV_CRTC_ACTIVE 0x01000000L +-#define ATOM_S3_DFP3_CRTC_ACTIVE 0x02000000L +-#define ATOM_S3_DFP4_CRTC_ACTIVE 0x04000000L +-#define ATOM_S3_DFP5_CRTC_ACTIVE 0x08000000L ++#define ATOM_S3_DFP3_CRTC_ACTIVE 0x02000000L ++#define ATOM_S3_DFP4_CRTC_ACTIVE 0x04000000L ++#define ATOM_S3_DFP5_CRTC_ACTIVE 0x08000000L + + + #define ATOM_S3_DEVICE_CRTC_ACTIVE_MASK 0x0FFF0000L +@@ -6238,9 +6799,9 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 + #define ATOM_S3_DFP6_ACTIVEb0 0x40 + #define ATOM_S3_DFP2_ACTIVEb0 0x80 + #define ATOM_S3_CV_ACTIVEb1 0x01 +-#define ATOM_S3_DFP3_ACTIVEb1 0x02 +-#define ATOM_S3_DFP4_ACTIVEb1 0x04 +-#define ATOM_S3_DFP5_ACTIVEb1 0x08 ++#define ATOM_S3_DFP3_ACTIVEb1 0x02 ++#define ATOM_S3_DFP4_ACTIVEb1 0x04 ++#define ATOM_S3_DFP5_ACTIVEb1 0x08 + + + #define ATOM_S3_ACTIVE_CRTC1w0 0xFFF +@@ -6254,9 +6815,9 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 + #define ATOM_S3_DFP6_CRTC_ACTIVEb2 0x40 + #define ATOM_S3_DFP2_CRTC_ACTIVEb2 0x80 + #define ATOM_S3_CV_CRTC_ACTIVEb3 0x01 +-#define ATOM_S3_DFP3_CRTC_ACTIVEb3 0x02 +-#define ATOM_S3_DFP4_CRTC_ACTIVEb3 0x04 +-#define ATOM_S3_DFP5_CRTC_ACTIVEb3 0x08 ++#define ATOM_S3_DFP3_CRTC_ACTIVEb3 0x02 ++#define ATOM_S3_DFP4_CRTC_ACTIVEb3 0x04 ++#define ATOM_S3_DFP5_CRTC_ACTIVEb3 0x08 + + + #define ATOM_S3_ACTIVE_CRTC2w1 0xFFF +@@ -6878,15 +7439,18 @@ typedef struct _ATOM_MC_INIT_PARAM_TABLE_V2_1 + #define _32Mx16 0x32 + #define _32Mx32 0x33 + #define _32Mx128 0x35 +-#define _64Mx32 0x43 + #define _64Mx8 0x41 + #define _64Mx16 0x42 ++#define _64Mx32 0x43 ++#define _64Mx128 0x45 + #define _128Mx8 0x51 + #define _128Mx16 0x52 + #define _128Mx32 0x53 + #define _256Mx8 0x61 + #define _256Mx16 0x62 ++#define _256Mx32 0x63 + #define _512Mx8 0x71 ++#define _512Mx16 0x72 + + + #define SAMSUNG 0x1 +@@ -7407,6 +7971,17 @@ typedef struct _ATOM_MEMORY_TRAINING_INFO + }ATOM_MEMORY_TRAINING_INFO; + + ++typedef struct _ATOM_MEMORY_TRAINING_INFO_V3_1 ++{ ++ ATOM_COMMON_TABLE_HEADER sHeader; ++ ULONG ulMCUcodeVersion; ++ USHORT usMCIOInitLen; //len of ATOM_REG_INIT_SETTING array ++ USHORT usMCUcodeLen; //len of ATOM_MC_UCODE_DATA array ++ USHORT usMCIORegInitOffset; //point of offset of ATOM_REG_INIT_SETTING array ++ USHORT usMCUcodeOffset; //point of offset of MC uCode ULONG array. ++}ATOM_MEMORY_TRAINING_INFO_V3_1; ++ ++ + typedef struct SW_I2C_CNTL_DATA_PARAMETERS + { + UCHAR ucControl; +@@ -7623,7 +8198,7 @@ typedef struct _ASIC_TRANSMITTER_INFO + { + USHORT usTransmitterObjId; + USHORT usSupportDevice; +- UCHAR ucTransmitterCmdTblId; ++ UCHAR ucTransmitterCmdTblId; + UCHAR ucConfig; + UCHAR ucEncoderID; //available 1st encoder ( default ) + UCHAR ucOptionEncoderID; //available 2nd encoder ( optional ) +diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h +index aec38fc..a461e15 100644 +--- a/drivers/gpu/drm/amd/include/cgs_common.h ++++ b/drivers/gpu/drm/amd/include/cgs_common.h +@@ -26,6 +26,8 @@ + + #include "amd_shared.h" + ++struct cgs_device; ++ + /** + * enum cgs_gpu_mem_type - GPU memory types + */ +@@ -92,6 +94,7 @@ enum cgs_voltage_planes { + */ + enum cgs_ucode_id { + CGS_UCODE_ID_SMU = 0, ++ CGS_UCODE_ID_SMU_SK, + CGS_UCODE_ID_SDMA0, + CGS_UCODE_ID_SDMA1, + CGS_UCODE_ID_CP_CE, +@@ -111,6 +114,7 @@ enum cgs_system_info_id { + CGS_SYSTEM_INFO_PCIE_MLW, + CGS_SYSTEM_INFO_CG_FLAGS, + CGS_SYSTEM_INFO_PG_FLAGS, ++ CGS_SYSTEM_INFO_GFX_CU_INFO, + CGS_SYSTEM_INFO_ID_MAXIMUM, + }; + +@@ -223,7 +227,7 @@ struct cgs_acpi_method_info { + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_gpu_mem_info_t)(void *cgs_device, enum cgs_gpu_mem_type type, ++typedef int (*cgs_gpu_mem_info_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, + uint64_t *mc_start, uint64_t *mc_size, + uint64_t *mem_size); + +@@ -239,7 +243,7 @@ typedef int (*cgs_gpu_mem_info_t)(void *cgs_device, enum cgs_gpu_mem_type type, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_gmap_kmem_t)(void *cgs_device, void *kmem, uint64_t size, ++typedef int (*cgs_gmap_kmem_t)(struct cgs_device *cgs_device, void *kmem, uint64_t size, + uint64_t min_offset, uint64_t max_offset, + cgs_handle_t *kmem_handle, uint64_t *mcaddr); + +@@ -250,7 +254,7 @@ typedef int (*cgs_gmap_kmem_t)(void *cgs_device, void *kmem, uint64_t size, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_gunmap_kmem_t)(void *cgs_device, cgs_handle_t kmem_handle); ++typedef int (*cgs_gunmap_kmem_t)(struct cgs_device *cgs_device, cgs_handle_t kmem_handle); + + /** + * cgs_alloc_gpu_mem() - Allocate GPU memory +@@ -279,7 +283,7 @@ typedef int (*cgs_gunmap_kmem_t)(void *cgs_device, cgs_handle_t kmem_handle); + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_alloc_gpu_mem_t)(void *cgs_device, enum cgs_gpu_mem_type type, ++typedef int (*cgs_alloc_gpu_mem_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, + uint64_t size, uint64_t align, + uint64_t min_offset, uint64_t max_offset, + cgs_handle_t *handle); +@@ -291,7 +295,7 @@ typedef int (*cgs_alloc_gpu_mem_t)(void *cgs_device, enum cgs_gpu_mem_type type, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_free_gpu_mem_t)(void *cgs_device, cgs_handle_t handle); ++typedef int (*cgs_free_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle); + + /** + * cgs_gmap_gpu_mem() - GPU-map GPU memory +@@ -303,7 +307,7 @@ typedef int (*cgs_free_gpu_mem_t)(void *cgs_device, cgs_handle_t handle); + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_gmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle, ++typedef int (*cgs_gmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle, + uint64_t *mcaddr); + + /** +@@ -315,7 +319,7 @@ typedef int (*cgs_gmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_gunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle); ++typedef int (*cgs_gunmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle); + + /** + * cgs_kmap_gpu_mem() - Kernel-map GPU memory +@@ -326,7 +330,7 @@ typedef int (*cgs_gunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle); + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_kmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle, ++typedef int (*cgs_kmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle, + void **map); + + /** +@@ -336,7 +340,7 @@ typedef int (*cgs_kmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_kunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle); ++typedef int (*cgs_kunmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle); + + /** + * cgs_read_register() - Read an MMIO register +@@ -345,7 +349,7 @@ typedef int (*cgs_kunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle); + * + * Return: register value + */ +-typedef uint32_t (*cgs_read_register_t)(void *cgs_device, unsigned offset); ++typedef uint32_t (*cgs_read_register_t)(struct cgs_device *cgs_device, unsigned offset); + + /** + * cgs_write_register() - Write an MMIO register +@@ -353,7 +357,7 @@ typedef uint32_t (*cgs_read_register_t)(void *cgs_device, unsigned offset); + * @offset: register offset + * @value: register value + */ +-typedef void (*cgs_write_register_t)(void *cgs_device, unsigned offset, ++typedef void (*cgs_write_register_t)(struct cgs_device *cgs_device, unsigned offset, + uint32_t value); + + /** +@@ -363,7 +367,7 @@ typedef void (*cgs_write_register_t)(void *cgs_device, unsigned offset, + * + * Return: register value + */ +-typedef uint32_t (*cgs_read_ind_register_t)(void *cgs_device, enum cgs_ind_reg space, ++typedef uint32_t (*cgs_read_ind_register_t)(struct cgs_device *cgs_device, enum cgs_ind_reg space, + unsigned index); + + /** +@@ -372,7 +376,7 @@ typedef uint32_t (*cgs_read_ind_register_t)(void *cgs_device, enum cgs_ind_reg s + * @offset: register offset + * @value: register value + */ +-typedef void (*cgs_write_ind_register_t)(void *cgs_device, enum cgs_ind_reg space, ++typedef void (*cgs_write_ind_register_t)(struct cgs_device *cgs_device, enum cgs_ind_reg space, + unsigned index, uint32_t value); + + /** +@@ -382,7 +386,7 @@ typedef void (*cgs_write_ind_register_t)(void *cgs_device, enum cgs_ind_reg spac + * + * Return: Value read + */ +-typedef uint8_t (*cgs_read_pci_config_byte_t)(void *cgs_device, unsigned addr); ++typedef uint8_t (*cgs_read_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr); + + /** + * cgs_read_pci_config_word() - Read word from PCI configuration space +@@ -391,7 +395,7 @@ typedef uint8_t (*cgs_read_pci_config_byte_t)(void *cgs_device, unsigned addr); + * + * Return: Value read + */ +-typedef uint16_t (*cgs_read_pci_config_word_t)(void *cgs_device, unsigned addr); ++typedef uint16_t (*cgs_read_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr); + + /** + * cgs_read_pci_config_dword() - Read dword from PCI configuration space +@@ -400,7 +404,7 @@ typedef uint16_t (*cgs_read_pci_config_word_t)(void *cgs_device, unsigned addr); + * + * Return: Value read + */ +-typedef uint32_t (*cgs_read_pci_config_dword_t)(void *cgs_device, ++typedef uint32_t (*cgs_read_pci_config_dword_t)(struct cgs_device *cgs_device, + unsigned addr); + + /** +@@ -409,7 +413,7 @@ typedef uint32_t (*cgs_read_pci_config_dword_t)(void *cgs_device, + * @addr: address + * @value: value to write + */ +-typedef void (*cgs_write_pci_config_byte_t)(void *cgs_device, unsigned addr, ++typedef void (*cgs_write_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr, + uint8_t value); + + /** +@@ -418,7 +422,7 @@ typedef void (*cgs_write_pci_config_byte_t)(void *cgs_device, unsigned addr, + * @addr: address, must be word-aligned + * @value: value to write + */ +-typedef void (*cgs_write_pci_config_word_t)(void *cgs_device, unsigned addr, ++typedef void (*cgs_write_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr, + uint16_t value); + + /** +@@ -427,7 +431,7 @@ typedef void (*cgs_write_pci_config_word_t)(void *cgs_device, unsigned addr, + * @addr: address, must be dword-aligned + * @value: value to write + */ +-typedef void (*cgs_write_pci_config_dword_t)(void *cgs_device, unsigned addr, ++typedef void (*cgs_write_pci_config_dword_t)(struct cgs_device *cgs_device, unsigned addr, + uint32_t value); + + +@@ -441,7 +445,7 @@ typedef void (*cgs_write_pci_config_dword_t)(void *cgs_device, unsigned addr, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_get_pci_resource_t)(void *cgs_device, ++typedef int (*cgs_get_pci_resource_t)(struct cgs_device *cgs_device, + enum cgs_resource_type resource_type, + uint64_t size, + uint64_t offset, +@@ -458,7 +462,7 @@ typedef int (*cgs_get_pci_resource_t)(void *cgs_device, + * Return: Pointer to start of the table, or NULL on failure + */ + typedef const void *(*cgs_atom_get_data_table_t)( +- void *cgs_device, unsigned table, ++ struct cgs_device *cgs_device, unsigned table, + uint16_t *size, uint8_t *frev, uint8_t *crev); + + /** +@@ -470,7 +474,7 @@ typedef const void *(*cgs_atom_get_data_table_t)( + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_atom_get_cmd_table_revs_t)(void *cgs_device, unsigned table, ++typedef int (*cgs_atom_get_cmd_table_revs_t)(struct cgs_device *cgs_device, unsigned table, + uint8_t *frev, uint8_t *crev); + + /** +@@ -481,7 +485,7 @@ typedef int (*cgs_atom_get_cmd_table_revs_t)(void *cgs_device, unsigned table, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_atom_exec_cmd_table_t)(void *cgs_device, ++typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device, + unsigned table, void *args); + + /** +@@ -491,7 +495,7 @@ typedef int (*cgs_atom_exec_cmd_table_t)(void *cgs_device, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_create_pm_request_t)(void *cgs_device, cgs_handle_t *request); ++typedef int (*cgs_create_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t *request); + + /** + * cgs_destroy_pm_request() - Destroy a power management request +@@ -500,7 +504,7 @@ typedef int (*cgs_create_pm_request_t)(void *cgs_device, cgs_handle_t *request); + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_destroy_pm_request_t)(void *cgs_device, cgs_handle_t request); ++typedef int (*cgs_destroy_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request); + + /** + * cgs_set_pm_request() - Activate or deactiveate a PM request +@@ -516,7 +520,7 @@ typedef int (*cgs_destroy_pm_request_t)(void *cgs_device, cgs_handle_t request); + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_set_pm_request_t)(void *cgs_device, cgs_handle_t request, ++typedef int (*cgs_set_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request, + int active); + + /** +@@ -528,7 +532,7 @@ typedef int (*cgs_set_pm_request_t)(void *cgs_device, cgs_handle_t request, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_pm_request_clock_t)(void *cgs_device, cgs_handle_t request, ++typedef int (*cgs_pm_request_clock_t)(struct cgs_device *cgs_device, cgs_handle_t request, + enum cgs_clock clock, unsigned freq); + + /** +@@ -540,7 +544,7 @@ typedef int (*cgs_pm_request_clock_t)(void *cgs_device, cgs_handle_t request, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_pm_request_engine_t)(void *cgs_device, cgs_handle_t request, ++typedef int (*cgs_pm_request_engine_t)(struct cgs_device *cgs_device, cgs_handle_t request, + enum cgs_engine engine, int powered); + + /** +@@ -551,7 +555,7 @@ typedef int (*cgs_pm_request_engine_t)(void *cgs_device, cgs_handle_t request, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_pm_query_clock_limits_t)(void *cgs_device, ++typedef int (*cgs_pm_query_clock_limits_t)(struct cgs_device *cgs_device, + enum cgs_clock clock, + struct cgs_clock_limits *limits); + +@@ -563,7 +567,7 @@ typedef int (*cgs_pm_query_clock_limits_t)(void *cgs_device, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_set_camera_voltages_t)(void *cgs_device, uint32_t mask, ++typedef int (*cgs_set_camera_voltages_t)(struct cgs_device *cgs_device, uint32_t mask, + const uint32_t *voltages); + /** + * cgs_get_firmware_info - Get the firmware information from core driver +@@ -573,23 +577,25 @@ typedef int (*cgs_set_camera_voltages_t)(void *cgs_device, uint32_t mask, + * + * Return: 0 on success, -errno otherwise + */ +-typedef int (*cgs_get_firmware_info)(void *cgs_device, ++typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device, + enum cgs_ucode_id type, + struct cgs_firmware_info *info); + +-typedef int(*cgs_set_powergating_state)(void *cgs_device, ++typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device, + enum amd_ip_block_type block_type, + enum amd_powergating_state state); + +-typedef int(*cgs_set_clockgating_state)(void *cgs_device, ++typedef int(*cgs_set_clockgating_state)(struct cgs_device *cgs_device, + enum amd_ip_block_type block_type, + enum amd_clockgating_state state); + + typedef int(*cgs_get_active_displays_info)( +- void *cgs_device, ++ struct cgs_device *cgs_device, + struct cgs_display_info *info); + +-typedef int (*cgs_call_acpi_method)(void *cgs_device, ++typedef int (*cgs_notify_dpm_enabled)(struct cgs_device *cgs_device, bool enabled); ++ ++typedef int (*cgs_call_acpi_method)(struct cgs_device *cgs_device, + uint32_t acpi_method, + uint32_t acpi_function, + void *pinput, void *poutput, +@@ -597,7 +603,7 @@ typedef int (*cgs_call_acpi_method)(void *cgs_device, + uint32_t input_size, + uint32_t output_size); + +-typedef int (*cgs_query_system_info)(void *cgs_device, ++typedef int (*cgs_query_system_info)(struct cgs_device *cgs_device, + struct cgs_system_info *sys_info); + + struct cgs_ops { +@@ -644,6 +650,8 @@ struct cgs_ops { + cgs_set_clockgating_state set_clockgating_state; + /* display manager */ + cgs_get_active_displays_info get_active_displays_info; ++ /* notify dpm enabled */ ++ cgs_notify_dpm_enabled notify_dpm_enabled; + /* ACPI */ + cgs_call_acpi_method call_acpi_method; + /* get system info */ +@@ -734,8 +742,12 @@ struct cgs_device + CGS_CALL(set_powergating_state, dev, block_type, state) + #define cgs_set_clockgating_state(dev, block_type, state) \ + CGS_CALL(set_clockgating_state, dev, block_type, state) ++#define cgs_notify_dpm_enabled(dev, enabled) \ ++ CGS_CALL(notify_dpm_enabled, dev, enabled) ++ + #define cgs_get_active_displays_info(dev, info) \ + CGS_CALL(get_active_displays_info, dev, info) ++ + #define cgs_call_acpi_method(dev, acpi_method, acpi_function, pintput, poutput, output_count, input_size, output_size) \ + CGS_CALL(call_acpi_method, dev, acpi_method, acpi_function, pintput, poutput, output_count, input_size, output_size) + #define cgs_query_system_info(dev, sys_info) \ +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc index 94b09fb9..f0addb32 100644 --- a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc +++ b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc @@ -1461,6 +1461,8 @@ patch 1569-ACP-code-as-per-4-1-0-kernel.patch patch 1570-4-1-0-kernel-s-drivers-I2C-directory.patch patch 1571-Add-support-for-amd-gnb-bus.patch patch 1572-drm-amdgpu-fix-num_rbs-exposed-to-userspace.patch +patch 1573-Add-power-gating-initialization-support-for-GFX8.0.patch patch 0300-amd-powerplay-handle-power-management-state-based-on.patch patch 0001-ALSA-hda-add-AMD-Stoney-PCI-ID-with-proper-driver-ca.patch patch 0001-amdgpu-fix-various-compilation-issues.patch +patch 0001-fs-prioritize-ext4-rootfs-type.patch |