31 files changed, 31278 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0001-winsys-radeon-make-radeon_bo_vtbl-static.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0001-winsys-radeon-make-radeon_bo_vtbl-static.patch
new file mode 100644
index 00000000..a6abee97
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0001-winsys-radeon-make-radeon_bo_vtbl-static.patch
@@ -0,0 +1,35 @@
+From 65a1b608e285322358732911e41fce4fe89a5e07 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 19:09:57 +0200
+Subject: [PATCH 01/29] winsys/radeon: make radeon_bo_vtbl static
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+index e609d68..061c814 100644
+--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
++++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+@@ -42,7 +42,7 @@
+ #include <fcntl.h>
+ #include <stdio.h>
+ 
+-extern const struct pb_vtbl radeon_bo_vtbl;
++static const struct pb_vtbl radeon_bo_vtbl;
+ 
+ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
+ {
+@@ -471,7 +471,7 @@ static void radeon_bo_fence(struct pb_buffer *buf,
+ {
+ }
+ 
+-const struct pb_vtbl radeon_bo_vtbl = {
++static const struct pb_vtbl radeon_bo_vtbl = {
+     radeon_bo_destroy,
+     NULL, /* never called */
+     NULL, /* never called */
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0002-gallium-radeon-print-winsys-info-with-R600_DEBUG-inf.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0002-gallium-radeon-print-winsys-info-with-R600_DEBUG-inf.patch
new file mode 100644
index 00000000..989b7a0d
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0002-gallium-radeon-print-winsys-info-with-R600_DEBUG-inf.patch
@@ -0,0 +1,71 @@
+From f3caa583548e36334891974a337d7e932d4260b3 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 20:15:16 +0200
+Subject: [PATCH 02/29] gallium/radeon: print winsys info with R600_DEBUG=info
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/r600_pipe_common.c | 27 +++++++++++++++++++++++++++
+ src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
+ 2 files changed, 28 insertions(+)
+
+diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
+index 2b27e0a..c6d7918 100644
+--- a/src/gallium/drivers/radeon/r600_pipe_common.c
++++ b/src/gallium/drivers/radeon/r600_pipe_common.c
+@@ -306,6 +306,7 @@ static const struct debug_named_value common_debug_options[] = {
+ 	{ "compute", DBG_COMPUTE, "Print compute info" },
+ 	{ "vm", DBG_VM, "Print virtual addresses when creating resources" },
+ 	{ "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
++	{ "info", DBG_INFO, "Print driver information" },
+ 
+ 	/* shaders */
+ 	{ "fs", DBG_FS, "Print fetch shaders" },
+@@ -874,6 +875,32 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
+ 		}
+ 	}
+ 
++	if (rscreen->debug_flags & DBG_INFO) {
++		printf("pci_id = 0x%x\n", rscreen->info.pci_id);
++		printf("family = %i\n", rscreen->info.family);
++		printf("chip_class = %i\n", rscreen->info.chip_class);
++		printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20));
++		printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20));
++		printf("max_sclk = %i\n", rscreen->info.max_sclk);
++		printf("max_compute_units = %i\n", rscreen->info.max_compute_units);
++		printf("max_se = %i\n", rscreen->info.max_se);
++		printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
++		printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
++		       rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
++		printf("has_uvd = %i\n", rscreen->info.has_uvd);
++		printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
++		printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends);
++		printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq);
++		printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config);
++		printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes);
++		printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes);
++		printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address);
++		printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma);
++		printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map);
++		printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid);
++		printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid);
++		printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid);
++	}
+ 	return true;
+ }
+ 
+diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
+index febd2a1..384a9a6 100644
+--- a/src/gallium/drivers/radeon/r600_pipe_common.h
++++ b/src/gallium/drivers/radeon/r600_pipe_common.h
+@@ -84,6 +84,7 @@
+ #define DBG_SWITCH_ON_EOP	(1 << 15)
+ #define DBG_FORCE_DMA		(1 << 16)
+ #define DBG_PRECOMPILE		(1 << 17)
++#define DBG_INFO		(1 << 18)
+ /* The maximum allowed bit is 20. */
+ 
+ #define R600_MAP_BUFFER_ALIGNMENT 64
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0003-radeonsi-remove-useless-includes.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0003-radeonsi-remove-useless-includes.patch
new file mode 100644
index 00000000..28736b8d
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0003-radeonsi-remove-useless-includes.patch
@@ -0,0 +1,27 @@
+From 2eb1c8e83edfa6ebb0603e20813136bd696bea01 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 20:16:35 +0200
+Subject: [PATCH 03/29] radeonsi: remove useless includes
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeonsi/si_pipe.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
+index 5dc657c..e68c30e 100644
+--- a/src/gallium/drivers/radeonsi/si_pipe.c
++++ b/src/gallium/drivers/radeonsi/si_pipe.c
+@@ -30,9 +30,6 @@
+ #include "util/u_memory.h"
+ #include "vl/vl_decoder.h"
+ 
+-#include <llvm-c/Target.h>
+-#include <llvm-c/TargetMachine.h>
+-
+ /*
+  * pipe_context
+  */
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0004-radeonsi-remove-deprecated-and-useless-registers.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0004-radeonsi-remove-deprecated-and-useless-registers.patch
new file mode 100644
index 00000000..0bc7a57d
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0004-radeonsi-remove-deprecated-and-useless-registers.patch
@@ -0,0 +1,36 @@
+From 3c24679d5732c8b90e793537cd43c69a3a4d0618 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 20:37:45 +0200
+Subject: [PATCH 04/29] radeonsi: remove deprecated and useless registers
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeonsi/si_state.c | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
+index 4bb6f2b..f24cbbd 100644
+--- a/src/gallium/drivers/radeonsi/si_state.c
++++ b/src/gallium/drivers/radeonsi/si_state.c
+@@ -3035,18 +3035,8 @@ void si_init_config(struct si_context *sctx)
+ 
+ 	si_cmd_context_control(pm4);
+ 
+-	si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0);
+-	si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0);
+ 	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
+ 	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
+-	si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0);
+-	si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0);
+-	si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0);
+-	si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0);
+-	si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0);
+-	si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0);
+-	si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0);
+-	si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0);
+ 
+ 	/* FIXME calculate these values somehow ??? */
+ 	si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0005-radeonsi-set-an-optimal-value-for-DB_Z_INFO_ZRANGE_P.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0005-radeonsi-set-an-optimal-value-for-DB_Z_INFO_ZRANGE_P.patch
new file mode 100644
index 00000000..7148d2ee
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0005-radeonsi-set-an-optimal-value-for-DB_Z_INFO_ZRANGE_P.patch
@@ -0,0 +1,43 @@
+From a20e66cd94137e196ae5ef627b8d71d5bc6a52ed Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 20:40:31 +0200
+Subject: [PATCH 05/29] radeonsi: set an optimal value for
+ DB_Z_INFO_ZRANGE_PRECISION
+
+Required because of a VI hw bug.
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeonsi/si_state.c | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
+index f24cbbd..dc19d29 100644
+--- a/src/gallium/drivers/radeonsi/si_state.c
++++ b/src/gallium/drivers/radeonsi/si_state.c
+@@ -1948,12 +1948,6 @@ static void si_init_depth_surface(struct si_context *sctx,
+ 		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
+ 			  S_028040_ALLOW_EXPCLEAR(1);
+ 
+-		/* This is optimal for the clear value of 1.0 and using
+-		 * the LESS and LEQUAL test functions. Set this to 0
+-		 * for the opposite case. This can only be changed when
+-		 * clearing. */
+-		z_info |= S_028040_ZRANGE_PRECISION(1);
+-
+ 		/* Use all of the htile_buffer for depth, because we don't
+ 		 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */
+ 		s_info |= S_028044_TILE_STENCIL_DISABLE(1);
+@@ -2183,7 +2177,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
+ 
+ 		r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
+ 		radeon_emit(cs, zb->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
+-		radeon_emit(cs, zb->db_z_info);		/* R_028040_DB_Z_INFO */
++		radeon_emit(cs, zb->db_z_info |		/* R_028040_DB_Z_INFO */
++			    S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
+ 		radeon_emit(cs, zb->db_stencil_info);	/* R_028044_DB_STENCIL_INFO */
+ 		radeon_emit(cs, zb->db_depth_base);	/* R_028048_DB_Z_READ_BASE */
+ 		radeon_emit(cs, zb->db_stencil_base);	/* R_02804C_DB_STENCIL_READ_BASE */
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0006-winsys-radeon-move-radeon_winsys.h-up-one-directory.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0006-winsys-radeon-move-radeon_winsys.h-up-one-directory.patch
new file mode 100644
index 00000000..90c1f976
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0006-winsys-radeon-move-radeon_winsys.h-up-one-directory.patch
@@ -0,0 +1,1483 @@
+From f31b21f2e2ec8f5ab61740ced586eb04dd4dcb37 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 22:50:33 +0200
+Subject: [PATCH 06/29] winsys/radeon: move radeon_winsys.h up one directory
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ .../auxiliary/target-helpers/inline_drm_helper.h   |   6 +-
+ src/gallium/drivers/r300/r300_chipset.c            |   2 +-
+ src/gallium/drivers/r300/r300_context.h            |   1 -
+ src/gallium/drivers/r300/r300_screen.h             |   2 +-
+ src/gallium/drivers/radeon/r600_pipe_common.h      |   2 +-
+ src/gallium/drivers/radeon/radeon_uvd.c            |   1 -
+ src/gallium/drivers/radeon/radeon_uvd.h            |   2 +-
+ src/gallium/drivers/radeon/radeon_vce.c            |   1 -
+ src/gallium/drivers/radeon/radeon_vce_40_2_2.c     |   1 -
+ src/gallium/drivers/radeon/radeon_video.c          |   1 -
+ src/gallium/drivers/radeon/radeon_video.h          |   2 +-
+ src/gallium/drivers/radeonsi/si_pm4.h              |   2 +-
+ src/gallium/targets/pipe-loader/pipe_r300.c        |   2 +-
+ src/gallium/targets/pipe-loader/pipe_r600.c        |   2 +-
+ src/gallium/targets/pipe-loader/pipe_radeonsi.c    |   2 +-
+ src/gallium/winsys/radeon/drm/Makefile.sources     |   2 +-
+ src/gallium/winsys/radeon/drm/radeon_drm_winsys.h  |   2 +-
+ src/gallium/winsys/radeon/drm/radeon_winsys.h      | 604 ---------------------
+ src/gallium/winsys/radeon/radeon_winsys.h          | 604 +++++++++++++++++++++
+ 19 files changed, 618 insertions(+), 623 deletions(-)
+ delete mode 100644 src/gallium/winsys/radeon/drm/radeon_winsys.h
+ create mode 100644 src/gallium/winsys/radeon/radeon_winsys.h
+
+diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+index 542ad43..d3c331d 100644
+--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
++++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+@@ -28,19 +28,19 @@
+ #endif
+ 
+ #if GALLIUM_R300
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "radeon/drm/radeon_drm_public.h"
+ #include "r300/r300_public.h"
+ #endif
+ 
+ #if GALLIUM_R600
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "radeon/drm/radeon_drm_public.h"
+ #include "r600/r600_public.h"
+ #endif
+ 
+ #if GALLIUM_RADEONSI
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "radeon/drm/radeon_drm_public.h"
+ #include "radeonsi/si_public.h"
+ #endif
+diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
+index 7a83611..c1c7ce3 100644
+--- a/src/gallium/drivers/r300/r300_chipset.c
++++ b/src/gallium/drivers/r300/r300_chipset.c
+@@ -22,7 +22,7 @@
+  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+ 
+ #include "r300_chipset.h"
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ 
+ #include "util/u_debug.h"
+ #include "util/u_memory.h"
+diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
+index 4d2b153..3873c9a 100644
+--- a/src/gallium/drivers/r300/r300_context.h
++++ b/src/gallium/drivers/r300/r300_context.h
+@@ -36,7 +36,6 @@
+ #include "r300_defines.h"
+ #include "r300_screen.h"
+ #include "compiler/radeon_regalloc.h"
+-#include "radeon/drm/radeon_winsys.h"
+ 
+ struct u_upload_mgr;
+ struct r300_context;
+diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
+index f0dd3c6..7bba39b 100644
+--- a/src/gallium/drivers/r300/r300_screen.h
++++ b/src/gallium/drivers/r300/r300_screen.h
+@@ -25,7 +25,7 @@
+ #define R300_SCREEN_H
+ 
+ #include "r300_chipset.h"
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "pipe/p_screen.h"
+ #include "util/u_slab.h"
+ #include "os/os_thread.h"
+diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
+index 384a9a6..b7df001 100644
+--- a/src/gallium/drivers/radeon/r600_pipe_common.h
++++ b/src/gallium/drivers/radeon/r600_pipe_common.h
+@@ -34,7 +34,7 @@
+ 
+ #include <stdio.h>
+ 
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ 
+ #include "util/u_blitter.h"
+ #include "util/u_double_list.h"
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
+index 9668d7d..4d4b54b 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.c
++++ b/src/gallium/drivers/radeon/radeon_uvd.c
+@@ -45,7 +45,6 @@
+ #include "vl/vl_defines.h"
+ #include "vl/vl_mpeg12_decoder.h"
+ 
+-#include "radeon/drm/radeon_winsys.h"
+ #include "r600_pipe_common.h"
+ #include "radeon_video.h"
+ #include "radeon_uvd.h"
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h
+index 462b101..41a6fb4 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.h
++++ b/src/gallium/drivers/radeon/radeon_uvd.h
+@@ -34,7 +34,7 @@
+ #ifndef RADEON_UVD_H
+ #define RADEON_UVD_H
+ 
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "vl/vl_video_buffer.h"
+ 
+ /* UVD uses PM4 packet type 0 and 2 */
+diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
+index 6d34bd3..5f710e6 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.c
++++ b/src/gallium/drivers/radeon/radeon_vce.c
+@@ -40,7 +40,6 @@
+ 
+ #include "vl/vl_video_buffer.h"
+ 
+-#include "radeon/drm/radeon_winsys.h"
+ #include "r600_pipe_common.h"
+ #include "radeon_video.h"
+ #include "radeon_vce.h"
+diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+index b176aa7..0902957 100644
+--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
++++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+@@ -40,7 +40,6 @@
+ 
+ #include "vl/vl_video_buffer.h"
+ 
+-#include "radeon/drm/radeon_winsys.h"
+ #include "r600_pipe_common.h"
+ #include "radeon_video.h"
+ #include "radeon_vce.h"
+diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
+index 1420798..6ec10c1 100644
+--- a/src/gallium/drivers/radeon/radeon_video.c
++++ b/src/gallium/drivers/radeon/radeon_video.c
+@@ -39,7 +39,6 @@
+ #include "vl/vl_defines.h"
+ #include "vl/vl_video_buffer.h"
+ 
+-#include "radeon/drm/radeon_winsys.h"
+ #include "r600_pipe_common.h"
+ #include "radeon_video.h"
+ #include "radeon_vce.h"
+diff --git a/src/gallium/drivers/radeon/radeon_video.h b/src/gallium/drivers/radeon/radeon_video.h
+index 974ea4f..6d0ff28 100644
+--- a/src/gallium/drivers/radeon/radeon_video.h
++++ b/src/gallium/drivers/radeon/radeon_video.h
+@@ -34,7 +34,7 @@
+ #ifndef RADEON_VIDEO_H
+ #define RADEON_VIDEO_H
+ 
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "vl/vl_video_buffer.h"
+ 
+ #define RVID_ERR(fmt, args...) \
+diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
+index bfb5562..d215882 100644
+--- a/src/gallium/drivers/radeonsi/si_pm4.h
++++ b/src/gallium/drivers/radeonsi/si_pm4.h
+@@ -27,7 +27,7 @@
+ #ifndef SI_PM4_H
+ #define SI_PM4_H
+ 
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ 
+ #define SI_PM4_MAX_DW		256
+ #define SI_PM4_MAX_BO		32
+diff --git a/src/gallium/targets/pipe-loader/pipe_r300.c b/src/gallium/targets/pipe-loader/pipe_r300.c
+index abcade4..368b8c2 100644
+--- a/src/gallium/targets/pipe-loader/pipe_r300.c
++++ b/src/gallium/targets/pipe-loader/pipe_r300.c
+@@ -1,7 +1,7 @@
+ #include "target-helpers/inline_debug_helper.h"
+ #include "state_tracker/drm_driver.h"
+ #include "radeon/drm/radeon_drm_public.h"
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "r300/r300_public.h"
+ 
+ static struct pipe_screen *
+diff --git a/src/gallium/targets/pipe-loader/pipe_r600.c b/src/gallium/targets/pipe-loader/pipe_r600.c
+index eb53637..65b11c8 100644
+--- a/src/gallium/targets/pipe-loader/pipe_r600.c
++++ b/src/gallium/targets/pipe-loader/pipe_r600.c
+@@ -1,7 +1,7 @@
+ #include "state_tracker/drm_driver.h"
+ #include "target-helpers/inline_debug_helper.h"
+ #include "radeon/drm/radeon_drm_public.h"
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "r600/r600_public.h"
+ 
+ static struct pipe_screen *
+diff --git a/src/gallium/targets/pipe-loader/pipe_radeonsi.c b/src/gallium/targets/pipe-loader/pipe_radeonsi.c
+index 1dcd781..5457b5b 100644
+--- a/src/gallium/targets/pipe-loader/pipe_radeonsi.c
++++ b/src/gallium/targets/pipe-loader/pipe_radeonsi.c
+@@ -1,7 +1,7 @@
+ #include "state_tracker/drm_driver.h"
+ #include "target-helpers/inline_debug_helper.h"
+ #include "radeon/drm/radeon_drm_public.h"
+-#include "radeon/drm/radeon_winsys.h"
++#include "radeon/radeon_winsys.h"
+ #include "radeonsi/si_public.h"
+ 
+ static struct pipe_screen *
+diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources
+index d30969e..ced788b 100644
+--- a/src/gallium/winsys/radeon/drm/Makefile.sources
++++ b/src/gallium/winsys/radeon/drm/Makefile.sources
+@@ -7,7 +7,7 @@ C_SOURCES := \
+ 	radeon_drm_public.h \
+ 	radeon_drm_winsys.c \
+ 	radeon_drm_winsys.h \
+-	radeon_winsys.h
++	../radeon_winsys.h
+ 
+ TOOLS_HDR := \
+ 	radeon_ctx.h
+diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+index 5711ffa..70657be 100644
+--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
++++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+@@ -30,7 +30,7 @@
+ #ifndef RADEON_DRM_WINSYS_H
+ #define RADEON_DRM_WINSYS_H
+ 
+-#include "radeon_winsys.h"
++#include "../radeon_winsys.h"
+ #include "os/os_thread.h"
+ #include <radeon_drm.h>
+ 
+diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
+deleted file mode 100644
+index a8cc60a..0000000
+--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
++++ /dev/null
+@@ -1,604 +0,0 @@
+-/*
+- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+- * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and associated documentation files (the "Software"),
+- * to deal in the Software without restriction, including without limitation
+- * on the rights to use, copy, modify, merge, publish, distribute, sub
+- * license, and/or sell copies of the Software, and to permit persons to whom
+- * the Software is furnished to do so, subject to the following conditions:
+- *
+- * The above copyright notice and this permission notice (including the next
+- * paragraph) shall be included in all copies or substantial portions of the
+- * Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+-
+-#ifndef RADEON_WINSYS_H
+-#define RADEON_WINSYS_H
+-
+-/* The public winsys interface header for the radeon driver. */
+-
+-/* R300 features in DRM.
+- *
+- * 2.6.0:
+- * - Hyper-Z
+- * - GB_Z_PEQ_CONFIG on rv350->r4xx
+- * - R500 FG_ALPHA_VALUE
+- *
+- * 2.8.0:
+- * - R500 US_FORMAT regs
+- * - R500 ARGB2101010 colorbuffer
+- * - CMask and AA regs
+- * - R16F/RG16F
+- */
+-
+-#include "pipebuffer/pb_buffer.h"
+-#include "radeon_surface.h"
+-
+-#define RADEON_MAX_CMDBUF_DWORDS (16 * 1024)
+-
+-#define RADEON_FLUSH_ASYNC		(1 << 0)
+-#define RADEON_FLUSH_KEEP_TILING_FLAGS	(1 << 1) /* needs DRM 2.12.0 */
+-#define RADEON_FLUSH_COMPUTE		(1 << 2)
+-#define RADEON_FLUSH_END_OF_FRAME       (1 << 3)
+-
+-/* Tiling flags. */
+-enum radeon_bo_layout {
+-    RADEON_LAYOUT_LINEAR = 0,
+-    RADEON_LAYOUT_TILED,
+-    RADEON_LAYOUT_SQUARETILED,
+-
+-    RADEON_LAYOUT_UNKNOWN
+-};
+-
+-enum radeon_bo_domain { /* bitfield */
+-    RADEON_DOMAIN_GTT  = 2,
+-    RADEON_DOMAIN_VRAM = 4,
+-    RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
+-};
+-
+-enum radeon_bo_flag { /* bitfield */
+-    RADEON_FLAG_GTT_WC =        (1 << 0),
+-    RADEON_FLAG_CPU_ACCESS =    (1 << 1),
+-    RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
+-};
+-
+-enum radeon_bo_usage { /* bitfield */
+-    RADEON_USAGE_READ = 2,
+-    RADEON_USAGE_WRITE = 4,
+-    RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
+-};
+-
+-enum radeon_family {
+-    CHIP_UNKNOWN = 0,
+-    CHIP_R300, /* R3xx-based cores. */
+-    CHIP_R350,
+-    CHIP_RV350,
+-    CHIP_RV370,
+-    CHIP_RV380,
+-    CHIP_RS400,
+-    CHIP_RC410,
+-    CHIP_RS480,
+-    CHIP_R420,     /* R4xx-based cores. */
+-    CHIP_R423,
+-    CHIP_R430,
+-    CHIP_R480,
+-    CHIP_R481,
+-    CHIP_RV410,
+-    CHIP_RS600,
+-    CHIP_RS690,
+-    CHIP_RS740,
+-    CHIP_RV515,    /* R5xx-based cores. */
+-    CHIP_R520,
+-    CHIP_RV530,
+-    CHIP_R580,
+-    CHIP_RV560,
+-    CHIP_RV570,
+-    CHIP_R600,
+-    CHIP_RV610,
+-    CHIP_RV630,
+-    CHIP_RV670,
+-    CHIP_RV620,
+-    CHIP_RV635,
+-    CHIP_RS780,
+-    CHIP_RS880,
+-    CHIP_RV770,
+-    CHIP_RV730,
+-    CHIP_RV710,
+-    CHIP_RV740,
+-    CHIP_CEDAR,
+-    CHIP_REDWOOD,
+-    CHIP_JUNIPER,
+-    CHIP_CYPRESS,
+-    CHIP_HEMLOCK,
+-    CHIP_PALM,
+-    CHIP_SUMO,
+-    CHIP_SUMO2,
+-    CHIP_BARTS,
+-    CHIP_TURKS,
+-    CHIP_CAICOS,
+-    CHIP_CAYMAN,
+-    CHIP_ARUBA,
+-    CHIP_TAHITI,
+-    CHIP_PITCAIRN,
+-    CHIP_VERDE,
+-    CHIP_OLAND,
+-    CHIP_HAINAN,
+-    CHIP_BONAIRE,
+-    CHIP_KAVERI,
+-    CHIP_KABINI,
+-    CHIP_HAWAII,
+-    CHIP_MULLINS,
+-    CHIP_LAST,
+-};
+-
+-enum chip_class {
+-    CLASS_UNKNOWN = 0,
+-    R300,
+-    R400,
+-    R500,
+-    R600,
+-    R700,
+-    EVERGREEN,
+-    CAYMAN,
+-    SI,
+-    CIK,
+-};
+-
+-enum ring_type {
+-    RING_GFX = 0,
+-    RING_DMA,
+-    RING_UVD,
+-    RING_VCE,
+-    RING_LAST,
+-};
+-
+-enum radeon_value_id {
+-    RADEON_REQUESTED_VRAM_MEMORY,
+-    RADEON_REQUESTED_GTT_MEMORY,
+-    RADEON_BUFFER_WAIT_TIME_NS,
+-    RADEON_TIMESTAMP,
+-    RADEON_NUM_CS_FLUSHES,
+-    RADEON_NUM_BYTES_MOVED,
+-    RADEON_VRAM_USAGE,
+-    RADEON_GTT_USAGE
+-};
+-
+-enum radeon_bo_priority {
+-    RADEON_PRIO_MIN,
+-    RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */
+-    RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */
+-    RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */
+-    RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */
+-    RADEON_PRIO_COLOR_BUFFER,
+-    RADEON_PRIO_DEPTH_BUFFER,
+-    RADEON_PRIO_SHADER_TEXTURE_MSAA,
+-    RADEON_PRIO_COLOR_BUFFER_MSAA,
+-    RADEON_PRIO_DEPTH_BUFFER_MSAA,
+-    RADEON_PRIO_COLOR_META,
+-    RADEON_PRIO_DEPTH_META,
+-    RADEON_PRIO_MAX /* must be <= 15 */
+-};
+-
+-struct winsys_handle;
+-struct radeon_winsys_cs_handle;
+-
+-struct radeon_winsys_cs {
+-    unsigned                    cdw;  /* Number of used dwords. */
+-    uint32_t                    *buf; /* The command buffer. */
+-    enum ring_type              ring_type;
+-};
+-
+-struct radeon_info {
+-    uint32_t                    pci_id;
+-    enum radeon_family          family;
+-    enum chip_class             chip_class;
+-    uint64_t                    gart_size;
+-    uint64_t                    vram_size;
+-    uint32_t                    max_sclk;
+-    uint32_t                    max_compute_units;
+-    uint32_t                    max_se;
+-    uint32_t                    max_sh_per_se;
+-
+-    uint32_t                    drm_major; /* version */
+-    uint32_t                    drm_minor;
+-    uint32_t                    drm_patchlevel;
+-
+-    boolean                     has_uvd;
+-    uint32_t                    vce_fw_version;
+-    boolean                     has_userptr;
+-
+-    uint32_t                    r300_num_gb_pipes;
+-    uint32_t                    r300_num_z_pipes;
+-
+-    uint32_t                    r600_num_backends;
+-    uint32_t                    r600_clock_crystal_freq;
+-    uint32_t                    r600_tiling_config;
+-    uint32_t                    r600_num_tile_pipes;
+-    uint32_t                    r600_max_pipes;
+-    boolean                     r600_virtual_address;
+-    boolean                     r600_has_dma;
+-
+-    uint32_t                    r600_backend_map;
+-    boolean                     r600_backend_map_valid;
+-
+-    boolean                     si_tile_mode_array_valid;
+-    uint32_t                    si_tile_mode_array[32];
+-    uint32_t                    si_backend_enabled_mask;
+-
+-    boolean                     cik_macrotile_mode_array_valid;
+-    uint32_t                    cik_macrotile_mode_array[16];
+-};
+-
+-enum radeon_feature_id {
+-    RADEON_FID_R300_HYPERZ_ACCESS,     /* ZMask + HiZ */
+-    RADEON_FID_R300_CMASK_ACCESS,
+-};
+-
+-struct radeon_winsys {
+-    /**
+-     * The screen object this winsys was created for
+-     */
+-    struct pipe_screen *screen;
+-
+-    /**
+-     * Decrement the winsys reference count.
+-     *
+-     * \param ws  The winsys this function is called for.
+-     * \return    True if the winsys and screen should be destroyed.
+-     */
+-    bool (*unref)(struct radeon_winsys *ws);
+-
+-    /**
+-     * Destroy this winsys.
+-     *
+-     * \param ws        The winsys this function is called from.
+-     */
+-    void (*destroy)(struct radeon_winsys *ws);
+-
+-    /**
+-     * Query an info structure from winsys.
+-     *
+-     * \param ws        The winsys this function is called from.
+-     * \param info      Return structure
+-     */
+-    void (*query_info)(struct radeon_winsys *ws,
+-                       struct radeon_info *info);
+-
+-    /**************************************************************************
+-     * Buffer management. Buffer attributes are mostly fixed over its lifetime.
+-     *
+-     * Remember that gallium gets to choose the interface it needs, and the
+-     * window systems must then implement that interface (rather than the
+-     * other way around...).
+-     *************************************************************************/
+-
+-    /**
+-     * Create a buffer object.
+-     *
+-     * \param ws        The winsys this function is called from.
+-     * \param size      The size to allocate.
+-     * \param alignment An alignment of the buffer in memory.
+-     * \param use_reusable_pool Whether the cache buffer manager should be used.
+-     * \param domain    A bitmask of the RADEON_DOMAIN_* flags.
+-     * \return          The created buffer object.
+-     */
+-    struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
+-                                       unsigned size,
+-                                       unsigned alignment,
+-                                       boolean use_reusable_pool,
+-                                       enum radeon_bo_domain domain,
+-                                       enum radeon_bo_flag flags);
+-
+-    struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
+-            struct pb_buffer *buf);
+-
+-    /**
+-     * Map the entire data store of a buffer object into the client's address
+-     * space.
+-     *
+-     * \param buf       A winsys buffer object to map.
+-     * \param cs        A command stream to flush if the buffer is referenced by it.
+-     * \param usage     A bitmask of the PIPE_TRANSFER_* flags.
+-     * \return          The pointer at the beginning of the buffer.
+-     */
+-    void *(*buffer_map)(struct radeon_winsys_cs_handle *buf,
+-                        struct radeon_winsys_cs *cs,
+-                        enum pipe_transfer_usage usage);
+-
+-    /**
+-     * Unmap a buffer object from the client's address space.
+-     *
+-     * \param buf       A winsys buffer object to unmap.
+-     */
+-    void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf);
+-
+-    /**
+-     * Return TRUE if a buffer object is being used by the GPU.
+-     *
+-     * \param buf       A winsys buffer object.
+-     * \param usage     Only check whether the buffer is busy for the given usage.
+-     */
+-    boolean (*buffer_is_busy)(struct pb_buffer *buf,
+-                              enum radeon_bo_usage usage);
+-
+-    /**
+-     * Wait for a buffer object until it is not used by a GPU. This is
+-     * equivalent to a fence placed after the last command using the buffer,
+-     * and synchronizing to the fence.
+-     *
+-     * \param buf       A winsys buffer object to wait for.
+-     * \param usage     Only wait until the buffer is idle for the given usage,
+-     *                  but may still be busy for some other usage.
+-     */
+-    void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
+-
+-    /**
+-     * Return tiling flags describing a memory layout of a buffer object.
+-     *
+-     * \param buf       A winsys buffer object to get the flags from.
+-     * \param macrotile A pointer to the return value of the microtile flag.
+-     * \param microtile A pointer to the return value of the macrotile flag.
+-     *
+-     * \note microtile and macrotile are not bitmasks!
+-     */
+-    void (*buffer_get_tiling)(struct pb_buffer *buf,
+-                              enum radeon_bo_layout *microtile,
+-                              enum radeon_bo_layout *macrotile,
+-                              unsigned *bankw, unsigned *bankh,
+-                              unsigned *tile_split,
+-                              unsigned *stencil_tile_split,
+-                              unsigned *mtilea,
+-                              bool *scanout);
+-
+-    /**
+-     * Set tiling flags describing a memory layout of a buffer object.
+-     *
+-     * \param buf       A winsys buffer object to set the flags for.
+-     * \param cs        A command stream to flush if the buffer is referenced by it.
+-     * \param macrotile A macrotile flag.
+-     * \param microtile A microtile flag.
+-     * \param stride    A stride of the buffer in bytes, for texturing.
+-     *
+-     * \note microtile and macrotile are not bitmasks!
+-     */
+-    void (*buffer_set_tiling)(struct pb_buffer *buf,
+-                              struct radeon_winsys_cs *rcs,
+-                              enum radeon_bo_layout microtile,
+-                              enum radeon_bo_layout macrotile,
+-                              unsigned bankw, unsigned bankh,
+-                              unsigned tile_split,
+-                              unsigned stencil_tile_split,
+-                              unsigned mtilea,
+-                              unsigned stride,
+-                              bool scanout);
+-
+-    /**
+-     * Get a winsys buffer from a winsys handle. The internal structure
+-     * of the handle is platform-specific and only a winsys should access it.
+-     *
+-     * \param ws        The winsys this function is called from.
+-     * \param whandle   A winsys handle pointer as was received from a state
+-     *                  tracker.
+-     * \param stride    The returned buffer stride in bytes.
+-     */
+-    struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws,
+-                                            struct winsys_handle *whandle,
+-                                            unsigned *stride);
+-
+-    /**
+-     * Get a winsys buffer from a user pointer. The resulting buffer can't
+-     * be exported. Both pointer and size must be page aligned.
+-     *
+-     * \param ws        The winsys this function is called from.
+-     * \param pointer   User pointer to turn into a buffer object.
+-     * \param Size      Size in bytes for the new buffer.
+-     */
+-    struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws,
+-                                         void *pointer, unsigned size);
+-
+-    /**
+-     * Get a winsys handle from a winsys buffer. The internal structure
+-     * of the handle is platform-specific and only a winsys should access it.
+-     *
+-     * \param buf       A winsys buffer object to get the handle from.
+-     * \param whandle   A winsys handle pointer.
+-     * \param stride    A stride of the buffer in bytes, for texturing.
+-     * \return          TRUE on success.
+-     */
+-    boolean (*buffer_get_handle)(struct pb_buffer *buf,
+-                                 unsigned stride,
+-                                 struct winsys_handle *whandle);
+-
+-    /**
+-     * Return the virtual address of a buffer.
+-     *
+-     * \param buf       A winsys buffer object
+-     * \return          virtual address
+-     */
+-    uint64_t (*buffer_get_virtual_address)(struct radeon_winsys_cs_handle *buf);
+-
+-    /**
+-     * Query the initial placement of the buffer from the kernel driver.
+-     */
+-    enum radeon_bo_domain (*buffer_get_initial_domain)(struct radeon_winsys_cs_handle *buf);
+-
+-    /**************************************************************************
+-     * Command submission.
+-     *
+-     * Each pipe context should create its own command stream and submit
+-     * commands independently of other contexts.
+-     *************************************************************************/
+-
+-    /**
+-     * Create a command stream.
+-     *
+-     * \param ws        The winsys this function is called from.
+-     * \param ring_type The ring type (GFX, DMA, UVD)
+-     * \param flush     Flush callback function associated with the command stream.
+-     * \param user      User pointer that will be passed to the flush callback.
+-     * \param trace_buf Trace buffer when tracing is enabled
+-     */
+-    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
+-                                          enum ring_type ring_type,
+-                                          void (*flush)(void *ctx, unsigned flags,
+-							struct pipe_fence_handle **fence),
+-                                          void *flush_ctx,
+-                                          struct radeon_winsys_cs_handle *trace_buf);
+-
+-    /**
+-     * Destroy a command stream.
+-     *
+-     * \param cs        A command stream to destroy.
+-     */
+-    void (*cs_destroy)(struct radeon_winsys_cs *cs);
+-
+-    /**
+-     * Add a new buffer relocation. Every relocation must first be added
+-     * before it can be written.
+-     *
+-     * \param cs  A command stream to add buffer for validation against.
+-     * \param buf A winsys buffer to validate.
+-     * \param usage   Whether the buffer is used for read and/or write.
+-     * \param domain  Bitmask of the RADEON_DOMAIN_* flags.
+-     * \param priority  A higher number means a greater chance of being
+-     *                  placed in the requested domain. 15 is the maximum.
+-     * \return Relocation index.
+-     */
+-    unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
+-                             struct radeon_winsys_cs_handle *buf,
+-                             enum radeon_bo_usage usage,
+-                             enum radeon_bo_domain domain,
+-                             enum radeon_bo_priority priority);
+-
+-    /**
+-     * Return the index of an already-added buffer.
+-     *
+-     * \param cs        Command stream
+-     * \param buf       Buffer
+-     * \return          The buffer index, or -1 if the buffer has not been added.
+-     */
+-    int (*cs_get_reloc)(struct radeon_winsys_cs *cs,
+-                        struct radeon_winsys_cs_handle *buf);
+-
+-    /**
+-     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+-     * added so far. If the validation fails, all the relocations which have
+-     * been added since the last call of cs_validate will be removed and
+-     * the CS will be flushed (provided there are still any relocations).
+-     *
+-     * \param cs        A command stream to validate.
+-     */
+-    boolean (*cs_validate)(struct radeon_winsys_cs *cs);
+-
+-    /**
+-     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+-     * added so far.
+-     *
+-     * \param cs        A command stream to validate.
+-     * \param vram      VRAM memory size pending to be use
+-     * \param gtt       GTT memory size pending to be use
+-     */
+-    boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
+-
+-    /**
+-     * Flush a command stream.
+-     *
+-     * \param cs          A command stream to flush.
+-     * \param flags,      RADEON_FLUSH_ASYNC or 0.
+-     * \param fence       Pointer to a fence. If non-NULL, a fence is inserted
+-     *                    after the CS and is returned through this parameter.
+-     * \param cs_trace_id A unique identifier of the cs, used for tracing.
+-     */
+-    void (*cs_flush)(struct radeon_winsys_cs *cs,
+-                     unsigned flags,
+-                     struct pipe_fence_handle **fence,
+-                     uint32_t cs_trace_id);
+-
+-    /**
+-     * Return TRUE if a buffer is referenced by a command stream.
+-     *
+-     * \param cs        A command stream.
+-     * \param buf       A winsys buffer.
+-     */
+-    boolean (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs,
+-                                       struct radeon_winsys_cs_handle *buf,
+-                                       enum radeon_bo_usage usage);
+-
+-    /**
+-     * Request access to a feature for a command stream.
+-     *
+-     * \param cs        A command stream.
+-     * \param fid       Feature ID, one of RADEON_FID_*
+-     * \param enable    Whether to enable or disable the feature.
+-     */
+-    boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
+-                                  enum radeon_feature_id fid,
+-                                  boolean enable);
+-     /**
+-      * Make sure all asynchronous flush of the cs have completed
+-      *
+-      * \param cs        A command stream.
+-      */
+-    void (*cs_sync_flush)(struct radeon_winsys_cs *cs);
+-
+-    /**
+-     * Wait for the fence and return true if the fence has been signalled.
+-     * The timeout of 0 will only return the status.
+-     * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
+-     * is signalled.
+-     */
+-    bool (*fence_wait)(struct radeon_winsys *ws,
+-                       struct pipe_fence_handle *fence,
+-                       uint64_t timeout);
+-
+-    /**
+-     * Reference counting for fences.
+-     */
+-    void (*fence_reference)(struct pipe_fence_handle **dst,
+-                            struct pipe_fence_handle *src);
+-
+-    /**
+-     * Initialize surface
+-     *
+-     * \param ws        The winsys this function is called from.
+-     * \param surf      Surface structure ptr
+-     */
+-    int (*surface_init)(struct radeon_winsys *ws,
+-                        struct radeon_surface *surf);
+-
+-    /**
+-     * Find best values for a surface
+-     *
+-     * \param ws        The winsys this function is called from.
+-     * \param surf      Surface structure ptr
+-     */
+-    int (*surface_best)(struct radeon_winsys *ws,
+-                        struct radeon_surface *surf);
+-
+-    uint64_t (*query_value)(struct radeon_winsys *ws,
+-                            enum radeon_value_id value);
+-};
+-
+-
+-static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
+-{
+-    cs->buf[cs->cdw++] = value;
+-}
+-
+-static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs,
+-				     const uint32_t *values, unsigned count)
+-{
+-    memcpy(cs->buf+cs->cdw, values, count * 4);
+-    cs->cdw += count;
+-}
+-
+-#endif
+diff --git a/src/gallium/winsys/radeon/radeon_winsys.h b/src/gallium/winsys/radeon/radeon_winsys.h
+new file mode 100644
+index 0000000..a8cc60a
+--- /dev/null
++++ b/src/gallium/winsys/radeon/radeon_winsys.h
+@@ -0,0 +1,604 @@
++/*
++ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
++ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * on the rights to use, copy, modify, merge, publish, distribute, sub
++ * license, and/or sell copies of the Software, and to permit persons to whom
++ * the Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
++
++#ifndef RADEON_WINSYS_H
++#define RADEON_WINSYS_H
++
++/* The public winsys interface header for the radeon driver. */
++
++/* R300 features in DRM.
++ *
++ * 2.6.0:
++ * - Hyper-Z
++ * - GB_Z_PEQ_CONFIG on rv350->r4xx
++ * - R500 FG_ALPHA_VALUE
++ *
++ * 2.8.0:
++ * - R500 US_FORMAT regs
++ * - R500 ARGB2101010 colorbuffer
++ * - CMask and AA regs
++ * - R16F/RG16F
++ */
++
++#include "pipebuffer/pb_buffer.h"
++#include "radeon_surface.h"
++
++#define RADEON_MAX_CMDBUF_DWORDS (16 * 1024)
++
++#define RADEON_FLUSH_ASYNC		(1 << 0)
++#define RADEON_FLUSH_KEEP_TILING_FLAGS	(1 << 1) /* needs DRM 2.12.0 */
++#define RADEON_FLUSH_COMPUTE		(1 << 2)
++#define RADEON_FLUSH_END_OF_FRAME       (1 << 3)
++
++/* Tiling flags. */
++enum radeon_bo_layout {
++    RADEON_LAYOUT_LINEAR = 0,
++    RADEON_LAYOUT_TILED,
++    RADEON_LAYOUT_SQUARETILED,
++
++    RADEON_LAYOUT_UNKNOWN
++};
++
++enum radeon_bo_domain { /* bitfield */
++    RADEON_DOMAIN_GTT  = 2,
++    RADEON_DOMAIN_VRAM = 4,
++    RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
++};
++
++enum radeon_bo_flag { /* bitfield */
++    RADEON_FLAG_GTT_WC =        (1 << 0),
++    RADEON_FLAG_CPU_ACCESS =    (1 << 1),
++    RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
++};
++
++enum radeon_bo_usage { /* bitfield */
++    RADEON_USAGE_READ = 2,
++    RADEON_USAGE_WRITE = 4,
++    RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
++};
++
++enum radeon_family {
++    CHIP_UNKNOWN = 0,
++    CHIP_R300, /* R3xx-based cores. */
++    CHIP_R350,
++    CHIP_RV350,
++    CHIP_RV370,
++    CHIP_RV380,
++    CHIP_RS400,
++    CHIP_RC410,
++    CHIP_RS480,
++    CHIP_R420,     /* R4xx-based cores. */
++    CHIP_R423,
++    CHIP_R430,
++    CHIP_R480,
++    CHIP_R481,
++    CHIP_RV410,
++    CHIP_RS600,
++    CHIP_RS690,
++    CHIP_RS740,
++    CHIP_RV515,    /* R5xx-based cores. */
++    CHIP_R520,
++    CHIP_RV530,
++    CHIP_R580,
++    CHIP_RV560,
++    CHIP_RV570,
++    CHIP_R600,
++    CHIP_RV610,
++    CHIP_RV630,
++    CHIP_RV670,
++    CHIP_RV620,
++    CHIP_RV635,
++    CHIP_RS780,
++    CHIP_RS880,
++    CHIP_RV770,
++    CHIP_RV730,
++    CHIP_RV710,
++    CHIP_RV740,
++    CHIP_CEDAR,
++    CHIP_REDWOOD,
++    CHIP_JUNIPER,
++    CHIP_CYPRESS,
++    CHIP_HEMLOCK,
++    CHIP_PALM,
++    CHIP_SUMO,
++    CHIP_SUMO2,
++    CHIP_BARTS,
++    CHIP_TURKS,
++    CHIP_CAICOS,
++    CHIP_CAYMAN,
++    CHIP_ARUBA,
++    CHIP_TAHITI,
++    CHIP_PITCAIRN,
++    CHIP_VERDE,
++    CHIP_OLAND,
++    CHIP_HAINAN,
++    CHIP_BONAIRE,
++    CHIP_KAVERI,
++    CHIP_KABINI,
++    CHIP_HAWAII,
++    CHIP_MULLINS,
++    CHIP_LAST,
++};
++
++enum chip_class {
++    CLASS_UNKNOWN = 0,
++    R300,
++    R400,
++    R500,
++    R600,
++    R700,
++    EVERGREEN,
++    CAYMAN,
++    SI,
++    CIK,
++};
++
++enum ring_type {
++    RING_GFX = 0,
++    RING_DMA,
++    RING_UVD,
++    RING_VCE,
++    RING_LAST,
++};
++
++enum radeon_value_id {
++    RADEON_REQUESTED_VRAM_MEMORY,
++    RADEON_REQUESTED_GTT_MEMORY,
++    RADEON_BUFFER_WAIT_TIME_NS,
++    RADEON_TIMESTAMP,
++    RADEON_NUM_CS_FLUSHES,
++    RADEON_NUM_BYTES_MOVED,
++    RADEON_VRAM_USAGE,
++    RADEON_GTT_USAGE
++};
++
++enum radeon_bo_priority {
++    RADEON_PRIO_MIN,
++    RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */
++    RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */
++    RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */
++    RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */
++    RADEON_PRIO_COLOR_BUFFER,
++    RADEON_PRIO_DEPTH_BUFFER,
++    RADEON_PRIO_SHADER_TEXTURE_MSAA,
++    RADEON_PRIO_COLOR_BUFFER_MSAA,
++    RADEON_PRIO_DEPTH_BUFFER_MSAA,
++    RADEON_PRIO_COLOR_META,
++    RADEON_PRIO_DEPTH_META,
++    RADEON_PRIO_MAX /* must be <= 15 */
++};
++
++struct winsys_handle;
++struct radeon_winsys_cs_handle;
++
++struct radeon_winsys_cs {
++    unsigned                    cdw;  /* Number of used dwords. */
++    uint32_t                    *buf; /* The command buffer. */
++    enum ring_type              ring_type;
++};
++
++struct radeon_info {
++    uint32_t                    pci_id;
++    enum radeon_family          family;
++    enum chip_class             chip_class;
++    uint64_t                    gart_size;
++    uint64_t                    vram_size;
++    uint32_t                    max_sclk;
++    uint32_t                    max_compute_units;
++    uint32_t                    max_se;
++    uint32_t                    max_sh_per_se;
++
++    uint32_t                    drm_major; /* version */
++    uint32_t                    drm_minor;
++    uint32_t                    drm_patchlevel;
++
++    boolean                     has_uvd;
++    uint32_t                    vce_fw_version;
++    boolean                     has_userptr;
++
++    uint32_t                    r300_num_gb_pipes;
++    uint32_t                    r300_num_z_pipes;
++
++    uint32_t                    r600_num_backends;
++    uint32_t                    r600_clock_crystal_freq;
++    uint32_t                    r600_tiling_config;
++    uint32_t                    r600_num_tile_pipes;
++    uint32_t                    r600_max_pipes;
++    boolean                     r600_virtual_address;
++    boolean                     r600_has_dma;
++
++    uint32_t                    r600_backend_map;
++    boolean                     r600_backend_map_valid;
++
++    boolean                     si_tile_mode_array_valid;
++    uint32_t                    si_tile_mode_array[32];
++    uint32_t                    si_backend_enabled_mask;
++
++    boolean                     cik_macrotile_mode_array_valid;
++    uint32_t                    cik_macrotile_mode_array[16];
++};
++
++enum radeon_feature_id {
++    RADEON_FID_R300_HYPERZ_ACCESS,     /* ZMask + HiZ */
++    RADEON_FID_R300_CMASK_ACCESS,
++};
++
++struct radeon_winsys {
++    /**
++     * The screen object this winsys was created for
++     */
++    struct pipe_screen *screen;
++
++    /**
++     * Decrement the winsys reference count.
++     *
++     * \param ws  The winsys this function is called for.
++     * \return    True if the winsys and screen should be destroyed.
++     */
++    bool (*unref)(struct radeon_winsys *ws);
++
++    /**
++     * Destroy this winsys.
++     *
++     * \param ws        The winsys this function is called from.
++     */
++    void (*destroy)(struct radeon_winsys *ws);
++
++    /**
++     * Query an info structure from winsys.
++     *
++     * \param ws        The winsys this function is called from.
++     * \param info      Return structure
++     */
++    void (*query_info)(struct radeon_winsys *ws,
++                       struct radeon_info *info);
++
++    /**************************************************************************
++     * Buffer management. Buffer attributes are mostly fixed over its lifetime.
++     *
++     * Remember that gallium gets to choose the interface it needs, and the
++     * window systems must then implement that interface (rather than the
++     * other way around...).
++     *************************************************************************/
++
++    /**
++     * Create a buffer object.
++     *
++     * \param ws        The winsys this function is called from.
++     * \param size      The size to allocate.
++     * \param alignment An alignment of the buffer in memory.
++     * \param use_reusable_pool Whether the cache buffer manager should be used.
++     * \param domain    A bitmask of the RADEON_DOMAIN_* flags.
++     * \return          The created buffer object.
++     */
++    struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
++                                       unsigned size,
++                                       unsigned alignment,
++                                       boolean use_reusable_pool,
++                                       enum radeon_bo_domain domain,
++                                       enum radeon_bo_flag flags);
++
++    struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
++            struct pb_buffer *buf);
++
++    /**
++     * Map the entire data store of a buffer object into the client's address
++     * space.
++     *
++     * \param buf       A winsys buffer object to map.
++     * \param cs        A command stream to flush if the buffer is referenced by it.
++     * \param usage     A bitmask of the PIPE_TRANSFER_* flags.
++     * \return          The pointer at the beginning of the buffer.
++     */
++    void *(*buffer_map)(struct radeon_winsys_cs_handle *buf,
++                        struct radeon_winsys_cs *cs,
++                        enum pipe_transfer_usage usage);
++
++    /**
++     * Unmap a buffer object from the client's address space.
++     *
++     * \param buf       A winsys buffer object to unmap.
++     */
++    void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf);
++
++    /**
++     * Return TRUE if a buffer object is being used by the GPU.
++     *
++     * \param buf       A winsys buffer object.
++     * \param usage     Only check whether the buffer is busy for the given usage.
++     */
++    boolean (*buffer_is_busy)(struct pb_buffer *buf,
++                              enum radeon_bo_usage usage);
++
++    /**
++     * Wait for a buffer object until it is not used by a GPU. This is
++     * equivalent to a fence placed after the last command using the buffer,
++     * and synchronizing to the fence.
++     *
++     * \param buf       A winsys buffer object to wait for.
++     * \param usage     Only wait until the buffer is idle for the given usage,
++     *                  but may still be busy for some other usage.
++     */
++    void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
++
++    /**
++     * Return tiling flags describing a memory layout of a buffer object.
++     *
++     * \param buf       A winsys buffer object to get the flags from.
++     * \param macrotile A pointer to the return value of the microtile flag.
++     * \param microtile A pointer to the return value of the macrotile flag.
++     *
++     * \note microtile and macrotile are not bitmasks!
++     */
++    void (*buffer_get_tiling)(struct pb_buffer *buf,
++                              enum radeon_bo_layout *microtile,
++                              enum radeon_bo_layout *macrotile,
++                              unsigned *bankw, unsigned *bankh,
++                              unsigned *tile_split,
++                              unsigned *stencil_tile_split,
++                              unsigned *mtilea,
++                              bool *scanout);
++
++    /**
++     * Set tiling flags describing a memory layout of a buffer object.
++     *
++     * \param buf       A winsys buffer object to set the flags for.
++     * \param cs        A command stream to flush if the buffer is referenced by it.
++     * \param macrotile A macrotile flag.
++     * \param microtile A microtile flag.
++     * \param stride    A stride of the buffer in bytes, for texturing.
++     *
++     * \note microtile and macrotile are not bitmasks!
++     */
++    void (*buffer_set_tiling)(struct pb_buffer *buf,
++                              struct radeon_winsys_cs *rcs,
++                              enum radeon_bo_layout microtile,
++                              enum radeon_bo_layout macrotile,
++                              unsigned bankw, unsigned bankh,
++                              unsigned tile_split,
++                              unsigned stencil_tile_split,
++                              unsigned mtilea,
++                              unsigned stride,
++                              bool scanout);
++
++    /**
++     * Get a winsys buffer from a winsys handle. The internal structure
++     * of the handle is platform-specific and only a winsys should access it.
++     *
++     * \param ws        The winsys this function is called from.
++     * \param whandle   A winsys handle pointer as was received from a state
++     *                  tracker.
++     * \param stride    The returned buffer stride in bytes.
++     */
++    struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws,
++                                            struct winsys_handle *whandle,
++                                            unsigned *stride);
++
++    /**
++     * Get a winsys buffer from a user pointer. The resulting buffer can't
++     * be exported. Both pointer and size must be page aligned.
++     *
++     * \param ws        The winsys this function is called from.
++     * \param pointer   User pointer to turn into a buffer object.
++     * \param Size      Size in bytes for the new buffer.
++     */
++    struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws,
++                                         void *pointer, unsigned size);
++
++    /**
++     * Get a winsys handle from a winsys buffer. The internal structure
++     * of the handle is platform-specific and only a winsys should access it.
++     *
++     * \param buf       A winsys buffer object to get the handle from.
++     * \param whandle   A winsys handle pointer.
++     * \param stride    A stride of the buffer in bytes, for texturing.
++     * \return          TRUE on success.
++     */
++    boolean (*buffer_get_handle)(struct pb_buffer *buf,
++                                 unsigned stride,
++                                 struct winsys_handle *whandle);
++
++    /**
++     * Return the virtual address of a buffer.
++     *
++     * \param buf       A winsys buffer object
++     * \return          virtual address
++     */
++    uint64_t (*buffer_get_virtual_address)(struct radeon_winsys_cs_handle *buf);
++
++    /**
++     * Query the initial placement of the buffer from the kernel driver.
++     */
++    enum radeon_bo_domain (*buffer_get_initial_domain)(struct radeon_winsys_cs_handle *buf);
++
++    /**************************************************************************
++     * Command submission.
++     *
++     * Each pipe context should create its own command stream and submit
++     * commands independently of other contexts.
++     *************************************************************************/
++
++    /**
++     * Create a command stream.
++     *
++     * \param ws        The winsys this function is called from.
++     * \param ring_type The ring type (GFX, DMA, UVD)
++     * \param flush     Flush callback function associated with the command stream.
++     * \param user      User pointer that will be passed to the flush callback.
++     * \param trace_buf Trace buffer when tracing is enabled
++     */
++    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
++                                          enum ring_type ring_type,
++                                          void (*flush)(void *ctx, unsigned flags,
++							struct pipe_fence_handle **fence),
++                                          void *flush_ctx,
++                                          struct radeon_winsys_cs_handle *trace_buf);
++
++    /**
++     * Destroy a command stream.
++     *
++     * \param cs        A command stream to destroy.
++     */
++    void (*cs_destroy)(struct radeon_winsys_cs *cs);
++
++    /**
++     * Add a new buffer relocation. Every relocation must first be added
++     * before it can be written.
++     *
++     * \param cs  A command stream to add buffer for validation against.
++     * \param buf A winsys buffer to validate.
++     * \param usage   Whether the buffer is used for read and/or write.
++     * \param domain  Bitmask of the RADEON_DOMAIN_* flags.
++     * \param priority  A higher number means a greater chance of being
++     *                  placed in the requested domain. 15 is the maximum.
++     * \return Relocation index.
++     */
++    unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
++                             struct radeon_winsys_cs_handle *buf,
++                             enum radeon_bo_usage usage,
++                             enum radeon_bo_domain domain,
++                             enum radeon_bo_priority priority);
++
++    /**
++     * Return the index of an already-added buffer.
++     *
++     * \param cs        Command stream
++     * \param buf       Buffer
++     * \return          The buffer index, or -1 if the buffer has not been added.
++     */
++    int (*cs_get_reloc)(struct radeon_winsys_cs *cs,
++                        struct radeon_winsys_cs_handle *buf);
++
++    /**
++     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
++     * added so far. If the validation fails, all the relocations which have
++     * been added since the last call of cs_validate will be removed and
++     * the CS will be flushed (provided there are still any relocations).
++     *
++     * \param cs        A command stream to validate.
++     */
++    boolean (*cs_validate)(struct radeon_winsys_cs *cs);
++
++    /**
++     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
++     * added so far.
++     *
++     * \param cs        A command stream to validate.
++     * \param vram      VRAM memory size pending to be use
++     * \param gtt       GTT memory size pending to be use
++     */
++    boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
++
++    /**
++     * Flush a command stream.
++     *
++     * \param cs          A command stream to flush.
++     * \param flags,      RADEON_FLUSH_ASYNC or 0.
++     * \param fence       Pointer to a fence. If non-NULL, a fence is inserted
++     *                    after the CS and is returned through this parameter.
++     * \param cs_trace_id A unique identifier of the cs, used for tracing.
++     */
++    void (*cs_flush)(struct radeon_winsys_cs *cs,
++                     unsigned flags,
++                     struct pipe_fence_handle **fence,
++                     uint32_t cs_trace_id);
++
++    /**
++     * Return TRUE if a buffer is referenced by a command stream.
++     *
++     * \param cs        A command stream.
++     * \param buf       A winsys buffer.
++     */
++    boolean (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs,
++                                       struct radeon_winsys_cs_handle *buf,
++                                       enum radeon_bo_usage usage);
++
++    /**
++     * Request access to a feature for a command stream.
++     *
++     * \param cs        A command stream.
++     * \param fid       Feature ID, one of RADEON_FID_*
++     * \param enable    Whether to enable or disable the feature.
++     */
++    boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
++                                  enum radeon_feature_id fid,
++                                  boolean enable);
++     /**
++      * Make sure all asynchronous flush of the cs have completed
++      *
++      * \param cs        A command stream.
++      */
++    void (*cs_sync_flush)(struct radeon_winsys_cs *cs);
++
++    /**
++     * Wait for the fence and return true if the fence has been signalled.
++     * The timeout of 0 will only return the status.
++     * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
++     * is signalled.
++     */
++    bool (*fence_wait)(struct radeon_winsys *ws,
++                       struct pipe_fence_handle *fence,
++                       uint64_t timeout);
++
++    /**
++     * Reference counting for fences.
++     */
++    void (*fence_reference)(struct pipe_fence_handle **dst,
++                            struct pipe_fence_handle *src);
++
++    /**
++     * Initialize surface
++     *
++     * \param ws        The winsys this function is called from.
++     * \param surf      Surface structure ptr
++     */
++    int (*surface_init)(struct radeon_winsys *ws,
++                        struct radeon_surface *surf);
++
++    /**
++     * Find best values for a surface
++     *
++     * \param ws        The winsys this function is called from.
++     * \param surf      Surface structure ptr
++     */
++    int (*surface_best)(struct radeon_winsys *ws,
++                        struct radeon_surface *surf);
++
++    uint64_t (*query_value)(struct radeon_winsys *ws,
++                            enum radeon_value_id value);
++};
++
++
++static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
++{
++    cs->buf[cs->cdw++] = value;
++}
++
++static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs,
++				     const uint32_t *values, unsigned count)
++{
++    memcpy(cs->buf+cs->cdw, values, count * 4);
++    cs->cdw += count;
++}
++
++#endif
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0007-winsys-radeon-add-a-private-interface-for-radeon_sur.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0007-winsys-radeon-add-a-private-interface-for-radeon_sur.patch
new file mode 100644
index 00000000..094c76c0
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0007-winsys-radeon-add-a-private-interface-for-radeon_sur.patch
@@ -0,0 +1,659 @@
+From 96bcd3e235a5d326f455944e8393e254925b58bc Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 22:53:04 +0200
+Subject: [PATCH 07/29] winsys/radeon: add a private interface for
+ radeon_surface
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/r600/evergreen_state.c         |   6 +-
+ src/gallium/drivers/r600/r600_uvd.c                |   2 +-
+ src/gallium/drivers/radeon/r600_pipe_common.h      |   2 +-
+ src/gallium/drivers/radeon/r600_texture.c          |  12 +-
+ src/gallium/drivers/radeon/radeon_uvd.c            |   6 +-
+ src/gallium/drivers/radeon/radeon_uvd.h            |   4 +-
+ src/gallium/drivers/radeon/radeon_vce.c            |   2 +-
+ src/gallium/drivers/radeon/radeon_vce.h            |   6 +-
+ src/gallium/drivers/radeon/radeon_video.c          |   2 +-
+ src/gallium/drivers/radeon/radeon_video.h          |   2 +-
+ src/gallium/drivers/radeonsi/si_state.c            |   4 +-
+ src/gallium/drivers/radeonsi/si_uvd.c              |   4 +-
+ src/gallium/winsys/radeon/drm/Makefile.sources     |   1 +
+ src/gallium/winsys/radeon/drm/radeon_drm_surface.c | 180 +++++++++++++++++++++
+ src/gallium/winsys/radeon/drm/radeon_drm_winsys.c  |  20 +--
+ src/gallium/winsys/radeon/drm/radeon_drm_winsys.h  |   1 +
+ src/gallium/winsys/radeon/radeon_winsys.h          |  79 ++++++++-
+ 17 files changed, 286 insertions(+), 47 deletions(-)
+ create mode 100644 src/gallium/winsys/radeon/drm/radeon_drm_surface.c
+
+diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
+index edd886b..8951ab0 100644
+--- a/src/gallium/drivers/r600/evergreen_state.c
++++ b/src/gallium/drivers/r600/evergreen_state.c
+@@ -664,7 +664,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
+ 	unsigned height, depth, width;
+ 	unsigned macro_aspect, tile_split, bankh, bankw, nbanks, fmask_bankh;
+ 	enum pipe_format pipe_format = state->format;
+-	struct radeon_surface_level *surflevel;
++	struct radeon_surf_level *surflevel;
+ 	unsigned base_level, first_level, last_level;
+ 	uint64_t va;
+ 
+@@ -918,7 +918,7 @@ static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_
+ /**
+  * This function intializes the CB* register values for RATs.  It is meant
+  * to be used for 1D aligned buffers that do not have an associated
+- * radeon_surface.
++ * radeon_surf.
+  */
+ void evergreen_init_color_surface_rat(struct r600_context *rctx,
+ 					struct r600_surface *surf)
+@@ -1163,7 +1163,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
+ 	struct r600_screen *rscreen = rctx->screen;
+ 	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
+ 	unsigned level = surf->base.u.tex.level;
+-	struct radeon_surface_level *levelinfo = &rtex->surface.level[level];
++	struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
+ 	uint64_t offset;
+ 	unsigned format, array_mode;
+ 	unsigned macro_aspect, tile_split, bankh, bankw, nbanks;
+diff --git a/src/gallium/drivers/r600/r600_uvd.c b/src/gallium/drivers/r600/r600_uvd.c
+index ee5288f..357e901 100644
+--- a/src/gallium/drivers/r600/r600_uvd.c
++++ b/src/gallium/drivers/r600/r600_uvd.c
+@@ -57,7 +57,7 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
+ {
+ 	struct r600_context *ctx = (struct r600_context *)pipe;
+ 	struct r600_texture *resources[VL_NUM_COMPONENTS] = {};
+-	struct radeon_surface* surfaces[VL_NUM_COMPONENTS] = {};
++	struct radeon_surf* surfaces[VL_NUM_COMPONENTS] = {};
+ 	struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {};
+ 	const enum pipe_format *resource_formats;
+ 	struct pipe_video_buffer template;
+diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
+index b7df001..bdb4541 100644
+--- a/src/gallium/drivers/radeon/r600_pipe_common.h
++++ b/src/gallium/drivers/radeon/r600_pipe_common.h
+@@ -195,7 +195,7 @@ struct r600_texture {
+ 	unsigned			dirty_level_mask; /* each bit says if that mipmap is compressed */
+ 	struct r600_texture		*flushed_depth_texture;
+ 	boolean				is_flushing_texture;
+-	struct radeon_surface		surface;
++	struct radeon_surf		surface;
+ 
+ 	/* Colorbuffer compression and fast clear. */
+ 	struct r600_fmask_info		fmask;
+diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
+index ab8ce7b..dc510c9 100644
+--- a/src/gallium/drivers/radeon/r600_texture.c
++++ b/src/gallium/drivers/radeon/r600_texture.c
+@@ -119,7 +119,7 @@ static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned leve
+ }
+ 
+ static int r600_init_surface(struct r600_common_screen *rscreen,
+-			     struct radeon_surface *surface,
++			     struct radeon_surf *surface,
+ 			     const struct pipe_resource *ptex,
+ 			     unsigned array_mode,
+ 			     bool is_flushed_depth)
+@@ -234,7 +234,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
+ {
+ 	struct r600_texture *rtex = (struct r600_texture*)ptex;
+ 	struct r600_resource *resource = &rtex->resource;
+-	struct radeon_surface *surface = &rtex->surface;
++	struct radeon_surf *surface = &rtex->surface;
+ 	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ 
+ 	rscreen->ws->buffer_set_tiling(resource->buf,
+@@ -280,7 +280,7 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
+ 				 struct r600_fmask_info *out)
+ {
+ 	/* FMASK is allocated like an ordinary texture. */
+-	struct radeon_surface fmask = rtex->surface;
++	struct radeon_surf fmask = rtex->surface;
+ 
+ 	memset(out, 0, sizeof(*out));
+ 
+@@ -570,7 +570,7 @@ r600_texture_create_object(struct pipe_screen *screen,
+ 			   const struct pipe_resource *base,
+ 			   unsigned pitch_in_bytes_override,
+ 			   struct pb_buffer *buf,
+-			   struct radeon_surface *surface)
++			   struct radeon_surf *surface)
+ {
+ 	struct r600_texture *rtex;
+ 	struct r600_resource *resource;
+@@ -764,7 +764,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+ 					  const struct pipe_resource *templ)
+ {
+ 	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+-	struct radeon_surface surface = {0};
++	struct radeon_surf surface = {0};
+ 	int r;
+ 
+ 	r = r600_init_surface(rscreen, &surface, templ,
+@@ -790,7 +790,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
+ 	unsigned stride = 0;
+ 	unsigned array_mode;
+ 	enum radeon_bo_layout micro, macro;
+-	struct radeon_surface surface;
++	struct radeon_surf surface;
+ 	bool scanout;
+ 	int r;
+ 
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
+index 4d4b54b..be58d0b 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.c
++++ b/src/gallium/drivers/radeon/radeon_uvd.c
+@@ -870,7 +870,7 @@ error:
+ }
+ 
+ /* calculate top/bottom offset */
+-static unsigned texture_offset(struct radeon_surface *surface, unsigned layer)
++static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
+ {
+ 	return surface->level[0].offset +
+ 		layer * surface->level[0].slice_size;
+@@ -905,8 +905,8 @@ static unsigned bank_wh(unsigned bankwh)
+ /**
+  * fill decoding target field from the luma and chroma surfaces
+  */
+-void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma,
+-			  struct radeon_surface *chroma)
++void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
++			  struct radeon_surf *chroma)
+ {
+ 	msg->body.decode.dt_pitch = luma->level[0].pitch_bytes;
+ 	switch (luma->level[0].mode) {
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h
+index 41a6fb4..7442865 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.h
++++ b/src/gallium/drivers/radeon/radeon_uvd.h
+@@ -353,6 +353,6 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ 					     ruvd_set_dtb set_dtb);
+ 
+ /* fill decoding target field from the luma and chroma surfaces */
+-void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma,
+-			  struct radeon_surface *chroma);
++void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
++			  struct radeon_surf *chroma);
+ #endif
+diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
+index 5f710e6..e220f40 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.c
++++ b/src/gallium/drivers/radeon/radeon_vce.c
+@@ -337,7 +337,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ 	struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
+ 	struct rvce_encoder *enc;
+ 	struct pipe_video_buffer *tmp_buf, templat = {};
+-	struct radeon_surface *tmp_surf;
++	struct radeon_surf *tmp_surf;
+ 	unsigned cpb_size;
+ 
+ 	if (!rscreen->info.vce_fw_version) {
+diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
+index 7f0cd1f..5c6317a 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.h
++++ b/src/gallium/drivers/radeon/radeon_vce.h
+@@ -50,7 +50,7 @@ struct r600_common_screen;
+ /* driver dependent callback */
+ typedef void (*rvce_get_buffer)(struct pipe_resource *resource,
+ 				struct radeon_winsys_cs_handle **handle,
+-				struct radeon_surface **surface);
++				struct radeon_surf **surface);
+ 
+ /* Coded picture buffer slot */
+ struct rvce_cpb_slot {
+@@ -88,8 +88,8 @@ struct rvce_encoder {
+ 	rvce_get_buffer			get_buffer;
+ 
+ 	struct radeon_winsys_cs_handle*	handle;
+-	struct radeon_surface*		luma;
+-	struct radeon_surface*		chroma;
++	struct radeon_surf*		luma;
++	struct radeon_surf*		chroma;
+ 
+ 	struct radeon_winsys_cs_handle*	bs_handle;
+ 	unsigned			bs_size;
+diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
+index 6ec10c1..826e076 100644
+--- a/src/gallium/drivers/radeon/radeon_video.c
++++ b/src/gallium/drivers/radeon/radeon_video.c
+@@ -132,7 +132,7 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
+  */
+ void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind,
+ 			struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+-			struct radeon_surface *surfaces[VL_NUM_COMPONENTS])
++			struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
+ {
+ 	unsigned best_tiling, best_wh, off;
+ 	unsigned size, alignment;
+diff --git a/src/gallium/drivers/radeon/radeon_video.h b/src/gallium/drivers/radeon/radeon_video.h
+index 6d0ff28..c9ee67f 100644
+--- a/src/gallium/drivers/radeon/radeon_video.h
++++ b/src/gallium/drivers/radeon/radeon_video.h
+@@ -68,7 +68,7 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
+    sumup their sizes and replace the backend buffers with a single bo */
+ void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind,
+                         struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+-                        struct radeon_surface *surfaces[VL_NUM_COMPONENTS]);
++                        struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
+ 
+ /* returns supported codecs and other parameters */
+ int rvid_get_video_param(struct pipe_screen *screen,
+diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
+index dc19d29..7f0fdd5 100644
+--- a/src/gallium/drivers/radeonsi/si_state.c
++++ b/src/gallium/drivers/radeonsi/si_state.c
+@@ -1852,7 +1852,7 @@ static void si_init_depth_surface(struct si_context *sctx,
+ 	struct si_screen *sscreen = sctx->screen;
+ 	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
+ 	unsigned level = surf->base.u.tex.level;
+-	struct radeon_surface_level *levelinfo = &rtex->surface.level[level];
++	struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
+ 	unsigned format, tile_mode_index, array_mode;
+ 	unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
+ 	uint32_t z_info, s_info, db_depth_info;
+@@ -2258,7 +2258,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
+ 	unsigned char state_swizzle[4], swizzle[4];
+ 	unsigned height, depth, width;
+ 	enum pipe_format pipe_format = state->format;
+-	struct radeon_surface_level *surflevel;
++	struct radeon_surf_level *surflevel;
+ 	int first_non_void;
+ 	uint64_t va;
+ 
+diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c
+index 0ba3b12..2f10f9b 100644
+--- a/src/gallium/drivers/radeonsi/si_uvd.c
++++ b/src/gallium/drivers/radeonsi/si_uvd.c
+@@ -44,7 +44,7 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
+ {
+ 	struct si_context *ctx = (struct si_context *)pipe;
+ 	struct r600_texture *resources[VL_NUM_COMPONENTS] = {};
+-	struct radeon_surface *surfaces[VL_NUM_COMPONENTS] = {};
++	struct radeon_surf *surfaces[VL_NUM_COMPONENTS] = {};
+ 	struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {};
+ 	const enum pipe_format *resource_formats;
+ 	struct pipe_video_buffer template;
+@@ -136,7 +136,7 @@ static struct radeon_winsys_cs_handle* si_uvd_set_dtb(struct ruvd_msg *msg, stru
+ /* get the radeon resources for VCE */
+ static void si_vce_get_buffer(struct pipe_resource *resource,
+ 			      struct radeon_winsys_cs_handle **handle,
+-			      struct radeon_surface **surface)
++			      struct radeon_surf **surface)
+ {
+ 	struct r600_texture *res = (struct r600_texture *)resource;
+ 
+diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources
+index ced788b..13a71c4 100644
+--- a/src/gallium/winsys/radeon/drm/Makefile.sources
++++ b/src/gallium/winsys/radeon/drm/Makefile.sources
+@@ -5,6 +5,7 @@ C_SOURCES := \
+ 	radeon_drm_cs_dump.c \
+ 	radeon_drm_cs.h \
+ 	radeon_drm_public.h \
++	radeon_drm_surface.c \
+ 	radeon_drm_winsys.c \
+ 	radeon_drm_winsys.h \
+ 	../radeon_winsys.h
+diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
+new file mode 100644
+index 0000000..29d3467
+--- /dev/null
++++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
+@@ -0,0 +1,180 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ *
++ * Authors:
++ *   Marek Olšák <maraeo@gmail.com>
++ */
++
++#include "radeon_drm_winsys.h"
++
++#include <radeon_surface.h>
++
++static void surf_level_winsys_to_drm(struct radeon_surface_level *level_drm,
++                                     const struct radeon_surf_level *level_ws)
++{
++    level_drm->offset = level_ws->offset;
++    level_drm->slice_size = level_ws->slice_size;
++    level_drm->npix_x = level_ws->npix_x;
++    level_drm->npix_y = level_ws->npix_y;
++    level_drm->npix_z = level_ws->npix_z;
++    level_drm->nblk_x = level_ws->nblk_x;
++    level_drm->nblk_y = level_ws->nblk_y;
++    level_drm->nblk_z = level_ws->nblk_z;
++    level_drm->pitch_bytes = level_ws->pitch_bytes;
++    level_drm->mode = level_ws->mode;
++}
++
++static void surf_level_drm_to_winsys(struct radeon_surf_level *level_ws,
++                                     const struct radeon_surface_level *level_drm)
++{
++    level_ws->offset = level_drm->offset;
++    level_ws->slice_size = level_drm->slice_size;
++    level_ws->npix_x = level_drm->npix_x;
++    level_ws->npix_y = level_drm->npix_y;
++    level_ws->npix_z = level_drm->npix_z;
++    level_ws->nblk_x = level_drm->nblk_x;
++    level_ws->nblk_y = level_drm->nblk_y;
++    level_ws->nblk_z = level_drm->nblk_z;
++    level_ws->pitch_bytes = level_drm->pitch_bytes;
++    level_ws->mode = level_drm->mode;
++}
++
++static void surf_winsys_to_drm(struct radeon_surface *surf_drm,
++                               const struct radeon_surf *surf_ws)
++{
++    int i;
++
++    memset(surf_drm, 0, sizeof(*surf_drm));
++
++    surf_drm->npix_x = surf_ws->npix_x;
++    surf_drm->npix_y = surf_ws->npix_y;
++    surf_drm->npix_z = surf_ws->npix_z;
++    surf_drm->blk_w = surf_ws->blk_w;
++    surf_drm->blk_h = surf_ws->blk_h;
++    surf_drm->blk_d = surf_ws->blk_d;
++    surf_drm->array_size = surf_ws->array_size;
++    surf_drm->last_level = surf_ws->last_level;
++    surf_drm->bpe = surf_ws->bpe;
++    surf_drm->nsamples = surf_ws->nsamples;
++    surf_drm->flags = surf_ws->flags;
++
++    surf_drm->bo_size = surf_ws->bo_size;
++    surf_drm->bo_alignment = surf_ws->bo_alignment;
++
++    surf_drm->bankw = surf_ws->bankw;
++    surf_drm->bankh = surf_ws->bankh;
++    surf_drm->mtilea = surf_ws->mtilea;
++    surf_drm->tile_split = surf_ws->tile_split;
++    surf_drm->stencil_tile_split = surf_ws->stencil_tile_split;
++    surf_drm->stencil_offset = surf_ws->stencil_offset;
++
++    for (i = 0; i < RADEON_SURF_MAX_LEVEL; i++) {
++        surf_level_winsys_to_drm(&surf_drm->level[i], &surf_ws->level[i]);
++        surf_level_winsys_to_drm(&surf_drm->stencil_level[i],
++                                 &surf_ws->stencil_level[i]);
++
++        surf_drm->tiling_index[i] = surf_ws->tiling_index[i];
++        surf_drm->stencil_tiling_index[i] = surf_ws->stencil_tiling_index[i];
++    }
++}
++
++static void surf_drm_to_winsys(struct radeon_surf *surf_ws,
++                               const struct radeon_surface *surf_drm)
++{
++    int i;
++
++    memset(surf_ws, 0, sizeof(*surf_ws));
++
++    surf_ws->npix_x = surf_drm->npix_x;
++    surf_ws->npix_y = surf_drm->npix_y;
++    surf_ws->npix_z = surf_drm->npix_z;
++    surf_ws->blk_w = surf_drm->blk_w;
++    surf_ws->blk_h = surf_drm->blk_h;
++    surf_ws->blk_d = surf_drm->blk_d;
++    surf_ws->array_size = surf_drm->array_size;
++    surf_ws->last_level = surf_drm->last_level;
++    surf_ws->bpe = surf_drm->bpe;
++    surf_ws->nsamples = surf_drm->nsamples;
++    surf_ws->flags = surf_drm->flags;
++
++    surf_ws->bo_size = surf_drm->bo_size;
++    surf_ws->bo_alignment = surf_drm->bo_alignment;
++
++    surf_ws->bankw = surf_drm->bankw;
++    surf_ws->bankh = surf_drm->bankh;
++    surf_ws->mtilea = surf_drm->mtilea;
++    surf_ws->tile_split = surf_drm->tile_split;
++    surf_ws->stencil_tile_split = surf_drm->stencil_tile_split;
++    surf_ws->stencil_offset = surf_drm->stencil_offset;
++
++    for (i = 0; i < RADEON_SURF_MAX_LEVEL; i++) {
++        surf_level_drm_to_winsys(&surf_ws->level[i], &surf_drm->level[i]);
++        surf_level_drm_to_winsys(&surf_ws->stencil_level[i],
++                                 &surf_drm->stencil_level[i]);
++
++        surf_ws->tiling_index[i] = surf_drm->tiling_index[i];
++        surf_ws->stencil_tiling_index[i] = surf_drm->stencil_tiling_index[i];
++    }
++}
++
++static int radeon_winsys_surface_init(struct radeon_winsys *rws,
++                                      struct radeon_surf *surf_ws)
++{
++    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
++    struct radeon_surface surf_drm;
++    int r;
++
++    surf_winsys_to_drm(&surf_drm, surf_ws);
++
++    r = radeon_surface_init(ws->surf_man, &surf_drm);
++    if (r)
++        return r;
++
++    surf_drm_to_winsys(surf_ws, &surf_drm);
++    return 0;
++}
++
++static int radeon_winsys_surface_best(struct radeon_winsys *rws,
++                                      struct radeon_surf *surf_ws)
++{
++    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
++    struct radeon_surface surf_drm;
++    int r;
++
++    surf_winsys_to_drm(&surf_drm, surf_ws);
++
++    r = radeon_surface_best(ws->surf_man, &surf_drm);
++    if (r)
++        return r;
++
++    surf_drm_to_winsys(surf_ws, &surf_drm);
++    return 0;
++}
++
++void radeon_surface_init_functions(struct radeon_drm_winsys *ws)
++{
++    ws->base.surface_init = radeon_winsys_surface_init;
++    ws->base.surface_best = radeon_winsys_surface_best;
++}
+diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+index 2b12f4d..12767bf 100644
+--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
++++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+@@ -44,6 +44,7 @@
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <unistd.h>
++#include <radeon_surface.h>
+ 
+ #ifndef RADEON_INFO_ACTIVE_CU_COUNT
+ #define RADEON_INFO_ACTIVE_CU_COUNT 0x20
+@@ -507,22 +508,6 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs,
+     return FALSE;
+ }
+ 
+-static int radeon_drm_winsys_surface_init(struct radeon_winsys *rws,
+-                                          struct radeon_surface *surf)
+-{
+-    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
+-
+-    return radeon_surface_init(ws->surf_man, surf);
+-}
+-
+-static int radeon_drm_winsys_surface_best(struct radeon_winsys *rws,
+-                                          struct radeon_surface *surf)
+-{
+-    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
+-
+-    return radeon_surface_best(ws->surf_man, surf);
+-}
+-
+ static uint64_t radeon_query_value(struct radeon_winsys *rws,
+                                    enum radeon_value_id value)
+ {
+@@ -706,12 +691,11 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
+     ws->base.destroy = radeon_winsys_destroy;
+     ws->base.query_info = radeon_query_info;
+     ws->base.cs_request_feature = radeon_cs_request_feature;
+-    ws->base.surface_init = radeon_drm_winsys_surface_init;
+-    ws->base.surface_best = radeon_drm_winsys_surface_best;
+     ws->base.query_value = radeon_query_value;
+ 
+     radeon_bomgr_init_functions(ws);
+     radeon_drm_cs_init_functions(ws);
++    radeon_surface_init_functions(ws);
+ 
+     pipe_mutex_init(ws->hyperz_owner_mutex);
+     pipe_mutex_init(ws->cmask_owner_mutex);
+diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+index 70657be..3200f0d 100644
+--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
++++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+@@ -103,5 +103,6 @@ radeon_drm_winsys(struct radeon_winsys *base)
+ }
+ 
+ void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs);
++void radeon_surface_init_functions(struct radeon_drm_winsys *ws);
+ 
+ #endif
+diff --git a/src/gallium/winsys/radeon/radeon_winsys.h b/src/gallium/winsys/radeon/radeon_winsys.h
+index a8cc60a..7fb7ac9 100644
+--- a/src/gallium/winsys/radeon/radeon_winsys.h
++++ b/src/gallium/winsys/radeon/radeon_winsys.h
+@@ -41,7 +41,6 @@
+  */
+ 
+ #include "pipebuffer/pb_buffer.h"
+-#include "radeon_surface.h"
+ 
+ #define RADEON_MAX_CMDBUF_DWORDS (16 * 1024)
+ 
+@@ -243,6 +242,80 @@ enum radeon_feature_id {
+     RADEON_FID_R300_CMASK_ACCESS,
+ };
+ 
++#define RADEON_SURF_MAX_LEVEL                   32
++
++#define RADEON_SURF_TYPE_MASK                   0xFF
++#define RADEON_SURF_TYPE_SHIFT                  0
++#define     RADEON_SURF_TYPE_1D                     0
++#define     RADEON_SURF_TYPE_2D                     1
++#define     RADEON_SURF_TYPE_3D                     2
++#define     RADEON_SURF_TYPE_CUBEMAP                3
++#define     RADEON_SURF_TYPE_1D_ARRAY               4
++#define     RADEON_SURF_TYPE_2D_ARRAY               5
++#define RADEON_SURF_MODE_MASK                   0xFF
++#define RADEON_SURF_MODE_SHIFT                  8
++#define     RADEON_SURF_MODE_LINEAR                 0
++#define     RADEON_SURF_MODE_LINEAR_ALIGNED         1
++#define     RADEON_SURF_MODE_1D                     2
++#define     RADEON_SURF_MODE_2D                     3
++#define RADEON_SURF_SCANOUT                     (1 << 16)
++#define RADEON_SURF_ZBUFFER                     (1 << 17)
++#define RADEON_SURF_SBUFFER                     (1 << 18)
++#define RADEON_SURF_Z_OR_SBUFFER                (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
++#define RADEON_SURF_HAS_SBUFFER_MIPTREE         (1 << 19)
++#define RADEON_SURF_HAS_TILE_MODE_INDEX         (1 << 20)
++#define RADEON_SURF_FMASK                       (1 << 21)
++
++#define RADEON_SURF_GET(v, field)   (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
++#define RADEON_SURF_SET(v, field)   (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
++#define RADEON_SURF_CLR(v, field)   ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
++
++struct radeon_surf_level {
++    uint64_t                    offset;
++    uint64_t                    slice_size;
++    uint32_t                    npix_x;
++    uint32_t                    npix_y;
++    uint32_t                    npix_z;
++    uint32_t                    nblk_x;
++    uint32_t                    nblk_y;
++    uint32_t                    nblk_z;
++    uint32_t                    pitch_bytes;
++    uint32_t                    mode;
++};
++
++struct radeon_surf {
++    /* These are inputs to the calculator. */
++    uint32_t                    npix_x;
++    uint32_t                    npix_y;
++    uint32_t                    npix_z;
++    uint32_t                    blk_w;
++    uint32_t                    blk_h;
++    uint32_t                    blk_d;
++    uint32_t                    array_size;
++    uint32_t                    last_level;
++    uint32_t                    bpe;
++    uint32_t                    nsamples;
++    uint32_t                    flags;
++
++    /* These are return values. Some of them can be set by the caller, but
++     * they will be treated as hints (e.g. bankw, bankh) and might be
++     * changed by the calculator.
++     */
++    uint64_t                    bo_size;
++    uint64_t                    bo_alignment;
++    /* This applies to EG and later. */
++    uint32_t                    bankw;
++    uint32_t                    bankh;
++    uint32_t                    mtilea;
++    uint32_t                    tile_split;
++    uint32_t                    stencil_tile_split;
++    uint64_t                    stencil_offset;
++    struct radeon_surf_level    level[RADEON_SURF_MAX_LEVEL];
++    struct radeon_surf_level    stencil_level[RADEON_SURF_MAX_LEVEL];
++    uint32_t                    tiling_index[RADEON_SURF_MAX_LEVEL];
++    uint32_t                    stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
++};
++
+ struct radeon_winsys {
+     /**
+      * The screen object this winsys was created for
+@@ -573,7 +646,7 @@ struct radeon_winsys {
+      * \param surf      Surface structure ptr
+      */
+     int (*surface_init)(struct radeon_winsys *ws,
+-                        struct radeon_surface *surf);
++                        struct radeon_surf *surf);
+ 
+     /**
+      * Find best values for a surface
+@@ -582,7 +655,7 @@ struct radeon_winsys {
+      * \param surf      Surface structure ptr
+      */
+     int (*surface_best)(struct radeon_winsys *ws,
+-                        struct radeon_surface *surf);
++                        struct radeon_surf *surf);
+ 
+     uint64_t (*query_value)(struct radeon_winsys *ws,
+                             enum radeon_value_id value);
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0008-winsys-amdgpu-add-a-new-winsys-for-the-new-kernel-dr.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0008-winsys-amdgpu-add-a-new-winsys-for-the-new-kernel-dr.patch
new file mode 100644
index 00000000..88914a74
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0008-winsys-amdgpu-add-a-new-winsys-for-the-new-kernel-dr.patch
@@ -0,0 +1,2396 @@
+From c0e94dfc8abc3ec25c0a6342f9872a9e71aa7864 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 22:43:23 +0200
+Subject: [PATCH 08/29] winsys/amdgpu: add a new winsys for the new kernel
+ driver
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ configure.ac                                      |   5 +
+ src/gallium/Makefile.am                           |   1 +
+ src/gallium/drivers/r300/Automake.inc             |   6 +-
+ src/gallium/drivers/r600/Automake.inc             |   6 +-
+ src/gallium/drivers/radeonsi/Automake.inc         |   6 +-
+ src/gallium/targets/pipe-loader/Makefile.am       |  12 +-
+ src/gallium/winsys/radeon/amdgpu/Android.mk       |  40 ++
+ src/gallium/winsys/radeon/amdgpu/Makefile.am      |  12 +
+ src/gallium/winsys/radeon/amdgpu/Makefile.sources |   8 +
+ src/gallium/winsys/radeon/amdgpu/amdgpu_bo.c      | 643 ++++++++++++++++++++++
+ src/gallium/winsys/radeon/amdgpu/amdgpu_bo.h      |  75 +++
+ src/gallium/winsys/radeon/amdgpu/amdgpu_cs.c      | 578 +++++++++++++++++++
+ src/gallium/winsys/radeon/amdgpu/amdgpu_cs.h      | 149 +++++
+ src/gallium/winsys/radeon/amdgpu/amdgpu_public.h  |  14 +
+ src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c  | 491 +++++++++++++++++
+ src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h  |  80 +++
+ src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   8 +
+ src/gallium/winsys/radeon/radeon_winsys.h         |   4 +
+ 18 files changed, 2129 insertions(+), 9 deletions(-)
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/Android.mk
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/Makefile.am
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/Makefile.sources
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_bo.c
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_bo.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_cs.c
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_cs.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_public.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h
+
+diff --git a/configure.ac b/configure.ac
+index 095e23e..f22975f 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -68,6 +68,7 @@ AC_SUBST([OSMESA_VERSION])
+ dnl Versions for external dependencies
+ LIBDRM_REQUIRED=2.4.38
+ LIBDRM_RADEON_REQUIRED=2.4.56
++LIBDRM_AMDGPU_REQUIRED=2.4.60
+ LIBDRM_INTEL_REQUIRED=2.4.60
+ LIBDRM_NVVIEUX_REQUIRED=2.4.33
+ LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
+@@ -2091,6 +2092,7 @@ if test -n "$with_gallium_drivers"; then
+         xr300)
+             HAVE_GALLIUM_R300=yes
+             PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
++            PKG_CHECK_MODULES([AMDGPU], [libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
+             gallium_require_drm "Gallium R300"
+             gallium_require_drm_loader
+             gallium_require_llvm "Gallium R300"
+@@ -2098,6 +2100,7 @@ if test -n "$with_gallium_drivers"; then
+         xr600)
+             HAVE_GALLIUM_R600=yes
+             PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
++            PKG_CHECK_MODULES([AMDGPU], [libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
+             gallium_require_drm "Gallium R600"
+             gallium_require_drm_loader
+             if test "x$enable_r600_llvm" = xyes -o "x$enable_opencl" = xyes; then
+@@ -2114,6 +2117,7 @@ if test -n "$with_gallium_drivers"; then
+         xradeonsi)
+             HAVE_GALLIUM_RADEONSI=yes
+             PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
++            PKG_CHECK_MODULES([AMDGPU], [libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
+             gallium_require_drm "radeonsi"
+             gallium_require_drm_loader
+             radeon_llvm_check "radeonsi"
+@@ -2384,6 +2388,7 @@ AC_CONFIG_FILES([Makefile
+ 		src/gallium/winsys/intel/drm/Makefile
+ 		src/gallium/winsys/nouveau/drm/Makefile
+ 		src/gallium/winsys/radeon/drm/Makefile
++		src/gallium/winsys/radeon/amdgpu/Makefile
+ 		src/gallium/winsys/svga/drm/Makefile
+ 		src/gallium/winsys/sw/dri/Makefile
+ 		src/gallium/winsys/sw/kms-dri/Makefile
+diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
+index ede6e21..fa526d4 100644
+--- a/src/gallium/Makefile.am
++++ b/src/gallium/Makefile.am
+@@ -63,6 +63,7 @@ endif
+ ## the radeon winsys - linked in by r300, r600 and radeonsi
+ if NEED_RADEON_DRM_WINSYS
+ SUBDIRS += winsys/radeon/drm
++SUBDIRS += winsys/radeon/amdgpu
+ endif
+ 
+ ## swrast/softpipe
+diff --git a/src/gallium/drivers/r300/Automake.inc b/src/gallium/drivers/r300/Automake.inc
+index 9334973..cfcd61c 100644
+--- a/src/gallium/drivers/r300/Automake.inc
++++ b/src/gallium/drivers/r300/Automake.inc
+@@ -5,9 +5,11 @@ TARGET_CPPFLAGS += -DGALLIUM_R300
+ TARGET_LIB_DEPS += \
+ 	$(top_builddir)/src/gallium/drivers/r300/libr300.la \
+ 	$(RADEON_LIBS) \
+-	$(INTEL_LIBS)
++	$(LIBDRM_LIBS) \
++	$(AMDGPU_LIBS)
+ 
+ TARGET_RADEON_WINSYS = \
+-	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
++	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
++	$(top_builddir)/src/gallium/winsys/radeon/amdgpu/libamdgpuwinsys.la
+ 
+ endif
+diff --git a/src/gallium/drivers/r600/Automake.inc b/src/gallium/drivers/r600/Automake.inc
+index 914eea3..2bb34b0 100644
+--- a/src/gallium/drivers/r600/Automake.inc
++++ b/src/gallium/drivers/r600/Automake.inc
+@@ -5,10 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_R600
+ TARGET_LIB_DEPS += \
+ 	$(top_builddir)/src/gallium/drivers/r600/libr600.la \
+ 	$(RADEON_LIBS) \
+-	$(LIBDRM_LIBS)
++	$(LIBDRM_LIBS) \
++	$(AMDGPU_LIBS)
+ 
+ TARGET_RADEON_WINSYS = \
+-	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
++	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
++	$(top_builddir)/src/gallium/winsys/radeon/amdgpu/libamdgpuwinsys.la
+ 
+ TARGET_RADEON_COMMON = \
+ 	$(top_builddir)/src/gallium/drivers/radeon/libradeon.la
+diff --git a/src/gallium/drivers/radeonsi/Automake.inc b/src/gallium/drivers/radeonsi/Automake.inc
+index 8686fff..200a254 100644
+--- a/src/gallium/drivers/radeonsi/Automake.inc
++++ b/src/gallium/drivers/radeonsi/Automake.inc
+@@ -5,10 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_RADEONSI
+ TARGET_LIB_DEPS += \
+ 	$(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
+ 	$(RADEON_LIBS) \
+-	$(LIBDRM_LIBS)
++	$(LIBDRM_LIBS) \
++	$(AMDGPU_LIBS)
+ 
+ TARGET_RADEON_WINSYS = \
+-	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
++	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
++	$(top_builddir)/src/gallium/winsys/radeon/amdgpu/libamdgpuwinsys.la
+ 
+ TARGET_RADEON_COMMON = \
+ 	$(top_builddir)/src/gallium/drivers/radeon/libradeon.la
+diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am
+index 967cdb7..3527090 100644
+--- a/src/gallium/targets/pipe-loader/Makefile.am
++++ b/src/gallium/targets/pipe-loader/Makefile.am
+@@ -124,9 +124,11 @@ nodist_EXTRA_pipe_r300_la_SOURCES = dummy.cpp
+ pipe_r300_la_LIBADD = \
+ 	$(PIPE_LIBS) \
+ 	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
++	$(top_builddir)/src/gallium/winsys/radeon/amdgpu/libamdgpuwinsys.la \
+ 	$(top_builddir)/src/gallium/drivers/r300/libr300.la \
+ 	$(LIBDRM_LIBS) \
+-	$(RADEON_LIBS)
++	$(RADEON_LIBS) \
++	$(AMDGPU_LIBS)
+ 
+ endif
+ 
+@@ -138,10 +140,12 @@ nodist_EXTRA_pipe_r600_la_SOURCES = dummy.cpp
+ pipe_r600_la_LIBADD = \
+ 	$(PIPE_LIBS) \
+ 	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
++	$(top_builddir)/src/gallium/winsys/radeon/amdgpu/libamdgpuwinsys.la \
+ 	$(top_builddir)/src/gallium/drivers/radeon/libradeon.la \
+ 	$(top_builddir)/src/gallium/drivers/r600/libr600.la \
+ 	$(LIBDRM_LIBS) \
+-	$(RADEON_LIBS)
++	$(RADEON_LIBS) \
++	$(AMDGPU_LIBS)
+ 
+ endif
+ 
+@@ -153,10 +157,12 @@ nodist_EXTRA_pipe_radeonsi_la_SOURCES = dummy.cpp
+ pipe_radeonsi_la_LIBADD = \
+ 	$(PIPE_LIBS) \
+ 	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
++	$(top_builddir)/src/gallium/winsys/radeon/amdgpu/libamdgpuwinsys.la \
+ 	$(top_builddir)/src/gallium/drivers/radeon/libradeon.la \
+ 	$(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
+ 	$(LIBDRM_LIBS) \
+-	$(RADEON_LIBS)
++	$(RADEON_LIBS) \
++	$(AMDGPU_LIBS)
+ 
+ endif
+ 
+diff --git a/src/gallium/winsys/radeon/amdgpu/Android.mk b/src/gallium/winsys/radeon/amdgpu/Android.mk
+new file mode 100644
+index 0000000..a10312f
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/Android.mk
+@@ -0,0 +1,40 @@
++# Mesa 3-D graphics library
++#
++# Copyright (C) 2011 Chia-I Wu <olvaffe@gmail.com>
++# Copyright (C) 2011 LunarG Inc.
++#
++# Permission is hereby granted, free of charge, to any person obtaining a
++# copy of this software and associated documentation files (the "Software"),
++# to deal in the Software without restriction, including without limitation
++# the rights to use, copy, modify, merge, publish, distribute, sublicense,
++# and/or sell copies of the Software, and to permit persons to whom the
++# Software is furnished to do so, subject to the following conditions:
++#
++# The above copyright notice and this permission notice shall be included
++# in all copies or substantial portions of the Software.
++#
++# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++# DEALINGS IN THE SOFTWARE.
++
++LOCAL_PATH := $(call my-dir)
++
++# get C_SOURCES
++include $(LOCAL_PATH)/Makefile.sources
++
++include $(CLEAR_VARS)
++
++LOCAL_SRC_FILES := $(C_SOURCES)
++
++LOCAL_C_INCLUDES := \
++	$(DRM_TOP) \
++	$(DRM_TOP)/include/drm
++
++LOCAL_MODULE := libmesa_winsys_amdgpu
++
++include $(GALLIUM_COMMON_MK)
++include $(BUILD_STATIC_LIBRARY)
+diff --git a/src/gallium/winsys/radeon/amdgpu/Makefile.am b/src/gallium/winsys/radeon/amdgpu/Makefile.am
+new file mode 100644
+index 0000000..80ecb75
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/Makefile.am
+@@ -0,0 +1,12 @@
++include Makefile.sources
++include $(top_srcdir)/src/gallium/Automake.inc
++
++AM_CFLAGS = \
++	$(GALLIUM_WINSYS_CFLAGS) \
++	$(AMDGPU_CFLAGS)
++
++AM_CXXFLAGS = $(AM_CFLAGS)
++
++noinst_LTLIBRARIES = libamdgpuwinsys.la
++
++libamdgpuwinsys_la_SOURCES = $(C_SOURCES)
+diff --git a/src/gallium/winsys/radeon/amdgpu/Makefile.sources b/src/gallium/winsys/radeon/amdgpu/Makefile.sources
+new file mode 100644
+index 0000000..0f55010
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/Makefile.sources
+@@ -0,0 +1,8 @@
++C_SOURCES := \
++	amdgpu_bo.c \
++	amdgpu_bo.h \
++	amdgpu_cs.c \
++	amdgpu_cs.h \
++	amdgpu_public.h \
++	amdgpu_winsys.c \
++	amdgpu_winsys.h
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_bo.c b/src/gallium/winsys/radeon/amdgpu/amdgpu_bo.c
+new file mode 100644
+index 0000000..de9548e
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_bo.c
+@@ -0,0 +1,643 @@
++/*
++ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++#include "amdgpu_cs.h"
++
++#include "os/os_time.h"
++#include "state_tracker/drm_driver.h"
++#include <amdgpu_drm.h>
++#include <xf86drm.h>
++#include <stdio.h>
++
++static const struct pb_vtbl amdgpu_winsys_bo_vtbl;
++
++static INLINE struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo)
++{
++   assert(bo->vtbl == &amdgpu_winsys_bo_vtbl);
++   return (struct amdgpu_winsys_bo *)bo;
++}
++
++struct amdgpu_bomgr {
++   struct pb_manager base;
++   struct amdgpu_winsys *rws;
++};
++
++static struct amdgpu_winsys *get_winsys(struct pb_manager *mgr)
++{
++   return ((struct amdgpu_bomgr*)mgr)->rws;
++}
++
++static struct amdgpu_winsys_bo *get_amdgpu_winsys_bo(struct pb_buffer *_buf)
++{
++   struct amdgpu_winsys_bo *bo = NULL;
++
++   if (_buf->vtbl == &amdgpu_winsys_bo_vtbl) {
++      bo = amdgpu_winsys_bo(_buf);
++   } else {
++      struct pb_buffer *base_buf;
++      pb_size offset;
++      pb_get_base_buffer(_buf, &base_buf, &offset);
++
++      if (base_buf->vtbl == &amdgpu_winsys_bo_vtbl)
++         bo = amdgpu_winsys_bo(base_buf);
++   }
++
++   return bo;
++}
++
++static void amdgpu_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage)
++{
++   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
++   struct radeon_winsys *ws = &bo->rws->base;
++
++   while (p_atomic_read(&bo->num_active_ioctls)) {
++      sched_yield();
++   }
++
++   if (bo->fence) {
++      ws->fence_wait(ws, bo->fence, PIPE_TIMEOUT_INFINITE);
++   }
++}
++
++static boolean amdgpu_bo_is_busy(struct pb_buffer *_buf,
++                                 enum radeon_bo_usage usage)
++{
++   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
++   struct radeon_winsys *ws = &bo->rws->base;
++
++   if (p_atomic_read(&bo->num_active_ioctls)) {
++      return TRUE;
++   }
++
++   return bo->fence && !ws->fence_wait(ws, bo->fence, 0);
++}
++
++static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
++      struct radeon_winsys_cs_handle *buf)
++{
++   return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
++}
++
++static void amdgpu_bo_destroy(struct pb_buffer *_buf)
++{
++   struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
++
++   amdgpu_bo_free(bo->bo);
++   amdgpu_fence_reference(&bo->fence, NULL);
++
++   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
++      bo->rws->allocated_vram -= align(bo->base.size, 4096);
++   else if (bo->initial_domain & RADEON_DOMAIN_GTT)
++      bo->rws->allocated_gtt -= align(bo->base.size, 4096);
++   FREE(bo);
++}
++
++static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
++                           struct radeon_winsys_cs *rcs,
++                           enum pipe_transfer_usage usage)
++{
++   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
++   struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
++   int r;
++   void *cpu = NULL;
++
++   /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
++   if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
++      /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
++      if (usage & PIPE_TRANSFER_DONTBLOCK) {
++         if (!(usage & PIPE_TRANSFER_WRITE)) {
++            /* Mapping for read.
++             *
++             * Since we are mapping for read, we don't need to wait
++             * if the GPU is using the buffer for read too
++             * (neither one is changing it).
++             *
++             * Only check whether the buffer is being used for write. */
++            if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
++                                                               RADEON_USAGE_WRITE)) {
++               cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
++               return NULL;
++            }
++
++            if (amdgpu_bo_is_busy((struct pb_buffer*)bo,
++                                  RADEON_USAGE_WRITE)) {
++               return NULL;
++            }
++         } else {
++            if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
++               cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
++               return NULL;
++            }
++
++            if (amdgpu_bo_is_busy((struct pb_buffer*)bo,
++                                  RADEON_USAGE_READWRITE)) {
++               return NULL;
++            }
++         }
++      } else {
++         uint64_t time = os_time_get_nano();
++
++         if (!(usage & PIPE_TRANSFER_WRITE)) {
++            /* Mapping for read.
++             *
++             * Since we are mapping for read, we don't need to wait
++             * if the GPU is using the buffer for read too
++             * (neither one is changing it).
++             *
++             * Only check whether the buffer is being used for write. */
++            if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
++                                                               RADEON_USAGE_WRITE)) {
++               cs->flush_cs(cs->flush_data, 0, NULL);
++            }
++            amdgpu_bo_wait((struct pb_buffer*)bo,
++                           RADEON_USAGE_WRITE);
++         } else {
++            /* Mapping for write. */
++            if (cs) {
++               if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
++                  cs->flush_cs(cs->flush_data, 0, NULL);
++               } else {
++                  /* Try to avoid busy-waiting in radeon_bo_wait. */
++                  if (p_atomic_read(&bo->num_active_ioctls))
++                     amdgpu_cs_sync_flush(rcs);
++               }
++            }
++
++            amdgpu_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
++         }
++
++         bo->rws->buffer_wait_time += os_time_get_nano() - time;
++      }
++   }
++
++   r = amdgpu_bo_cpu_map(bo->bo, &cpu);
++   return r ? NULL : cpu;
++}
++
++static void amdgpu_bo_unmap(struct radeon_winsys_cs_handle *buf)
++{
++   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
++
++   amdgpu_bo_cpu_unmap(bo->bo);
++}
++
++static void amdgpu_bo_get_base_buffer(struct pb_buffer *buf,
++                                      struct pb_buffer **base_buf,
++                                      unsigned *offset)
++{
++   *base_buf = buf;
++   *offset = 0;
++}
++
++static enum pipe_error amdgpu_bo_validate(struct pb_buffer *_buf,
++                                          struct pb_validate *vl,
++                                          unsigned flags)
++{
++   /* Always pinned */
++   return PIPE_OK;
++}
++
++static void amdgpu_bo_fence(struct pb_buffer *buf,
++                            struct pipe_fence_handle *fence)
++{
++}
++
++static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
++   amdgpu_bo_destroy,
++   NULL, /* never called */
++   NULL, /* never called */
++   amdgpu_bo_validate,
++   amdgpu_bo_fence,
++   amdgpu_bo_get_base_buffer,
++};
++
++static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
++                                                pb_size size,
++                                                const struct pb_desc *desc)
++{
++   struct amdgpu_winsys *rws = get_winsys(_mgr);
++   struct amdgpu_bo_desc *rdesc = (struct amdgpu_bo_desc*)desc;
++   struct amdgpu_bo_alloc_request request = {0};
++   struct amdgpu_bo_alloc_result result = {0};
++   struct amdgpu_winsys_bo *bo;
++   int r;
++
++   assert(rdesc->initial_domain & RADEON_DOMAIN_VRAM_GTT);
++
++   request.alloc_size = size;
++   request.phys_alignment = desc->alignment;
++
++   if (rdesc->initial_domain & RADEON_DOMAIN_VRAM) {
++      request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
++      if (rdesc->flags & RADEON_FLAG_CPU_ACCESS)
++         request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
++   }
++   if (rdesc->initial_domain & RADEON_DOMAIN_GTT) {
++      request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
++      if (rdesc->flags & RADEON_FLAG_GTT_WC)
++         request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_WC;
++   }
++
++   r = amdgpu_bo_alloc(rws->dev, &request, &result);
++   if (r) {
++      fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
++      fprintf(stderr, "amdgpu:    size      : %d bytes\n", size);
++      fprintf(stderr, "amdgpu:    alignment : %d bytes\n", desc->alignment);
++      fprintf(stderr, "amdgpu:    domains   : %d\n", rdesc->initial_domain);
++      return NULL;
++   }
++
++   bo = CALLOC_STRUCT(amdgpu_winsys_bo);
++   if (!bo)
++      return NULL;
++
++   pipe_reference_init(&bo->base.reference, 1);
++   bo->base.alignment = desc->alignment;
++   bo->base.usage = desc->usage;
++   bo->base.size = size;
++   bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
++   bo->rws = rws;
++   bo->bo = result.buf_handle;
++   bo->va = result.virtual_mc_base_address;
++   bo->initial_domain = rdesc->initial_domain;
++
++   if (amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->handle)) {
++      amdgpu_bo_free(bo->bo);
++      FREE(bo);
++      return NULL;
++   }
++
++   if (rdesc->initial_domain & RADEON_DOMAIN_VRAM)
++      rws->allocated_vram += align(size, 4096);
++   else if (rdesc->initial_domain & RADEON_DOMAIN_GTT)
++      rws->allocated_gtt += align(size, 4096);
++
++   return &bo->base;
++}
++
++static void amdgpu_bomgr_flush(struct pb_manager *mgr)
++{
++   /* NOP */
++}
++
++/* This is for the cache bufmgr. */
++static boolean amdgpu_bomgr_is_buffer_busy(struct pb_manager *_mgr,
++                                           struct pb_buffer *_buf)
++{
++   struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
++
++   if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
++      return TRUE;
++   }
++
++   if (amdgpu_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) {
++      return TRUE;
++   }
++
++   return FALSE;
++}
++
++static void amdgpu_bomgr_destroy(struct pb_manager *mgr)
++{
++   FREE(mgr);
++}
++
++struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws)
++{
++   struct amdgpu_bomgr *mgr;
++
++   mgr = CALLOC_STRUCT(amdgpu_bomgr);
++   if (!mgr)
++      return NULL;
++
++   mgr->base.destroy = amdgpu_bomgr_destroy;
++   mgr->base.create_buffer = amdgpu_bomgr_create_bo;
++   mgr->base.flush = amdgpu_bomgr_flush;
++   mgr->base.is_buffer_busy = amdgpu_bomgr_is_buffer_busy;
++
++   mgr->rws = rws;
++   return &mgr->base;
++}
++
++static unsigned eg_tile_split(unsigned tile_split)
++{
++   switch (tile_split) {
++   case 0:     tile_split = 64;    break;
++   case 1:     tile_split = 128;   break;
++   case 2:     tile_split = 256;   break;
++   case 3:     tile_split = 512;   break;
++   default:
++   case 4:     tile_split = 1024;  break;
++   case 5:     tile_split = 2048;  break;
++   case 6:     tile_split = 4096;  break;
++   }
++   return tile_split;
++}
++
++static unsigned eg_tile_split_rev(unsigned eg_tile_split)
++{
++   switch (eg_tile_split) {
++   case 64:    return 0;
++   case 128:   return 1;
++   case 256:   return 2;
++   case 512:   return 3;
++   default:
++   case 1024:  return 4;
++   case 2048:  return 5;
++   case 4096:  return 6;
++   }
++}
++
++static void amdgpu_bo_get_tiling(struct pb_buffer *_buf,
++                                 enum radeon_bo_layout *microtiled,
++                                 enum radeon_bo_layout *macrotiled,
++                                 unsigned *bankw, unsigned *bankh,
++                                 unsigned *tile_split,
++                                 unsigned *stencil_tile_split,
++                                 unsigned *mtilea,
++                                 bool *scanout)
++{
++   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
++   struct amdgpu_bo_info info = {0};
++   uint32_t tiling_flags;
++   int r;
++
++   r = amdgpu_bo_query_info(bo->bo, &info);
++   if (r)
++      return;
++
++   tiling_flags = info.metadata.tiling_info;
++
++   *microtiled = RADEON_LAYOUT_LINEAR;
++   *macrotiled = RADEON_LAYOUT_LINEAR;
++   if (tiling_flags & AMDGPU_TILING_MICRO)
++      *microtiled = RADEON_LAYOUT_TILED;
++   else if (tiling_flags & AMDGPU_TILING_MICRO_SQUARE)
++      *microtiled = RADEON_LAYOUT_SQUARETILED;
++
++   if (tiling_flags & AMDGPU_TILING_MACRO)
++      *macrotiled = RADEON_LAYOUT_TILED;
++   if (bankw && tile_split && stencil_tile_split && mtilea && tile_split) {
++      *bankw = (tiling_flags >> AMDGPU_TILING_EG_BANKW_SHIFT) & AMDGPU_TILING_EG_BANKW_MASK;
++      *bankh = (tiling_flags >> AMDGPU_TILING_EG_BANKH_SHIFT) & AMDGPU_TILING_EG_BANKH_MASK;
++      *tile_split = (tiling_flags >> AMDGPU_TILING_EG_TILE_SPLIT_SHIFT) & AMDGPU_TILING_EG_TILE_SPLIT_MASK;
++      *stencil_tile_split = (tiling_flags >> AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_MASK;
++      *mtilea = (tiling_flags >> AMDGPU_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & AMDGPU_TILING_EG_MACRO_TILE_ASPECT_MASK;
++      *tile_split = eg_tile_split(*tile_split);
++   }
++   if (scanout)
++      *scanout = !(tiling_flags & AMDGPU_TILING_R600_NO_SCANOUT);
++}
++
++static void amdgpu_bo_set_tiling(struct pb_buffer *_buf,
++                                 struct radeon_winsys_cs *rcs,
++                                 enum radeon_bo_layout microtiled,
++                                 enum radeon_bo_layout macrotiled,
++                                 unsigned bankw, unsigned bankh,
++                                 unsigned tile_split,
++                                 unsigned stencil_tile_split,
++                                 unsigned mtilea,
++                                 uint32_t pitch,
++                                 bool scanout)
++{
++   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
++   struct amdgpu_cs *cs = amdgpu_cs(rcs);
++   struct amdgpu_bo_metadata metadata = {0};
++   uint32_t tiling_flags = 0;
++
++
++   /* Tiling determines how DRM treats the buffer data.
++     * We must flush CS when changing it if the buffer is referenced. */
++   if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
++      cs->flush_cs(cs->flush_data, 0, NULL);
++   }
++
++   while (p_atomic_read(&bo->num_active_ioctls)) {
++      sched_yield();
++   }
++
++   if (microtiled == RADEON_LAYOUT_TILED)
++      tiling_flags |= AMDGPU_TILING_MICRO;
++   else if (microtiled == RADEON_LAYOUT_SQUARETILED)
++      tiling_flags |= AMDGPU_TILING_MICRO_SQUARE;
++
++   if (macrotiled == RADEON_LAYOUT_TILED)
++      tiling_flags |= AMDGPU_TILING_MACRO;
++
++   tiling_flags |= (bankw & AMDGPU_TILING_EG_BANKW_MASK) <<
++                   AMDGPU_TILING_EG_BANKW_SHIFT;
++   tiling_flags |= (bankh & AMDGPU_TILING_EG_BANKH_MASK) <<
++                   AMDGPU_TILING_EG_BANKH_SHIFT;
++   if (tile_split) {
++      tiling_flags |= (eg_tile_split_rev(tile_split) &
++                       AMDGPU_TILING_EG_TILE_SPLIT_MASK) <<
++                      AMDGPU_TILING_EG_TILE_SPLIT_SHIFT;
++   }
++   tiling_flags |= (stencil_tile_split &
++                    AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_MASK) <<
++                   AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_SHIFT;
++   tiling_flags |= (mtilea & AMDGPU_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
++                   AMDGPU_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
++
++   if (!scanout)
++      tiling_flags |= AMDGPU_TILING_R600_NO_SCANOUT;
++
++   metadata.tiling_info = tiling_flags;
++
++   amdgpu_bo_set_metadata(bo->bo, &metadata);
++}
++
++static struct radeon_winsys_cs_handle *amdgpu_get_cs_handle(struct pb_buffer *_buf)
++{
++   /* return a direct pointer to amdgpu_winsys_bo. */
++   return (struct radeon_winsys_cs_handle*)get_amdgpu_winsys_bo(_buf);
++}
++
++static struct pb_buffer *
++amdgpu_bo_create(struct radeon_winsys *rws,
++                 unsigned size,
++                 unsigned alignment,
++                 boolean use_reusable_pool,
++                 enum radeon_bo_domain domain,
++                 enum radeon_bo_flag flags)
++{
++   struct amdgpu_winsys *ws = amdgpu_winsys(rws);
++   struct amdgpu_bo_desc desc;
++   struct pb_manager *provider;
++   struct pb_buffer *buffer;
++
++   memset(&desc, 0, sizeof(desc));
++   desc.base.alignment = alignment;
++
++   /* Only set one usage bit each for domains and flags, or the cache manager
++    * might consider different sets of domains / flags compatible
++    */
++   if (domain == RADEON_DOMAIN_VRAM_GTT)
++      desc.base.usage = 1 << 2;
++   else
++      desc.base.usage = domain >> 1;
++   assert(flags < sizeof(desc.base.usage) * 8 - 3);
++   desc.base.usage |= 1 << (flags + 3);
++
++   desc.initial_domain = domain;
++   desc.flags = flags;
++
++   /* Assign a buffer manager. */
++   if (use_reusable_pool)
++      provider = ws->cman;
++   else
++      provider = ws->kman;
++
++   buffer = provider->create_buffer(provider, size, &desc.base);
++   if (!buffer)
++      return NULL;
++
++   return (struct pb_buffer*)buffer;
++}
++
++static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
++                                               struct winsys_handle *whandle,
++                                               unsigned *stride)
++{
++   struct amdgpu_winsys *ws = amdgpu_winsys(rws);
++   struct amdgpu_winsys_bo *bo;
++   enum amdgpu_bo_handle_type type;
++   struct amdgpu_bo_import_result result = {0};
++   struct amdgpu_bo_info info = {0};
++   enum radeon_bo_domain initial = 0;
++   int r;
++
++   switch (whandle->type) {
++   case DRM_API_HANDLE_TYPE_SHARED:
++      type = amdgpu_bo_handle_type_gem_flink_name;
++      break;
++   case DRM_API_HANDLE_TYPE_FD:
++      type = amdgpu_bo_handle_type_dma_buf_fd;
++      break;
++   default:
++      return NULL;
++   }
++
++   r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
++   if (r)
++      return NULL;
++
++   /* Get initial domains. */
++   r = amdgpu_bo_query_info(result.buf_handle, &info);
++   if (r) {
++      amdgpu_bo_free(result.buf_handle);
++      return NULL;
++   }
++
++   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
++      initial |= RADEON_DOMAIN_VRAM;
++   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
++      initial |= RADEON_DOMAIN_GTT;
++
++   /* Initialize the structure. */
++   bo = CALLOC_STRUCT(amdgpu_winsys_bo);
++   if (!bo) {
++      amdgpu_bo_free(result.buf_handle);
++      return NULL;
++   }
++
++   pipe_reference_init(&bo->base.reference, 1);
++   bo->base.alignment = info.phys_alignment;
++   bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
++   bo->bo = result.buf_handle;
++   bo->base.size = result.alloc_size;
++   bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
++   bo->rws = ws;
++   bo->va = result.virtual_mc_base_address;
++   bo->initial_domain = initial;
++
++   if (amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->handle)) {
++      amdgpu_bo_free(bo->bo);
++      FREE(bo);
++      return NULL;
++   }
++
++   if (stride)
++      *stride = whandle->stride;
++
++   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
++      ws->allocated_vram += align(bo->base.size, 4096);
++   else if (bo->initial_domain & RADEON_DOMAIN_GTT)
++      ws->allocated_gtt += align(bo->base.size, 4096);
++
++   return &bo->base;
++}
++
++static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
++                                    unsigned stride,
++                                    struct winsys_handle *whandle)
++{
++   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(buffer);
++   enum amdgpu_bo_handle_type type;
++   int r;
++
++   switch (whandle->type) {
++   case DRM_API_HANDLE_TYPE_SHARED:
++      type = amdgpu_bo_handle_type_gem_flink_name;
++      break;
++   case DRM_API_HANDLE_TYPE_FD:
++      type = amdgpu_bo_handle_type_dma_buf_fd;
++      break;
++   case DRM_API_HANDLE_TYPE_KMS:
++      type = amdgpu_bo_handle_type_kms;
++      break;
++   default:
++      return FALSE;
++   }
++
++   r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
++   if (r)
++      return FALSE;
++
++   whandle->stride = stride;
++   return TRUE;
++}
++
++static uint64_t amdgpu_bo_get_va(struct radeon_winsys_cs_handle *buf)
++{
++   return ((struct amdgpu_winsys_bo*)buf)->va;
++}
++
++void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws)
++{
++   ws->base.buffer_get_cs_handle = amdgpu_get_cs_handle;
++   ws->base.buffer_set_tiling = amdgpu_bo_set_tiling;
++   ws->base.buffer_get_tiling = amdgpu_bo_get_tiling;
++   ws->base.buffer_map = amdgpu_bo_map;
++   ws->base.buffer_unmap = amdgpu_bo_unmap;
++   ws->base.buffer_wait = amdgpu_bo_wait;
++   ws->base.buffer_is_busy = amdgpu_bo_is_busy;
++   ws->base.buffer_create = amdgpu_bo_create;
++   ws->base.buffer_from_handle = amdgpu_bo_from_handle;
++   ws->base.buffer_get_handle = amdgpu_bo_get_handle;
++   ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
++   ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
++}
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_bo.h b/src/gallium/winsys/radeon/amdgpu/amdgpu_bo.h
+new file mode 100644
+index 0000000..ccf98b5
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_bo.h
+@@ -0,0 +1,75 @@
++/*
++ * Copyright © 2008 Jérôme Glisse
++ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Jérôme Glisse <glisse@freedesktop.org>
++ *      Marek Olšák <maraeo@gmail.com>
++ */
++#ifndef AMDGPU_DRM_BO_H
++#define AMDGPU_DRM_BO_H
++
++#include "amdgpu_winsys.h"
++#include "pipebuffer/pb_bufmgr.h"
++
++struct amdgpu_bo_desc {
++   struct pb_desc base;
++
++   enum radeon_bo_domain initial_domain;
++   unsigned flags;
++};
++
++struct amdgpu_winsys_bo {
++   struct pb_buffer base;
++
++   struct amdgpu_winsys *rws;
++
++   amdgpu_bo_handle bo;
++   uint32_t handle;
++   uint64_t va;
++   enum radeon_bo_domain initial_domain;
++
++   /* how many command streams is this bo referenced in? */
++   int num_cs_references;
++
++   /* how many command streams, which are being emitted in a separate
++    * thread, is this bo referenced in? */
++   int num_active_ioctls;
++
++   struct pipe_fence_handle *fence; /* for buffer_wait & buffer_is_busy */
++};
++
++struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws);
++void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws);
++
++static INLINE
++void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst,
++                                struct amdgpu_winsys_bo *src)
++{
++   pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
++}
++
++#endif
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_cs.c b/src/gallium/winsys/radeon/amdgpu/amdgpu_cs.c
+new file mode 100644
+index 0000000..aee7ff3
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_cs.c
+@@ -0,0 +1,578 @@
++/*
++ * Copyright © 2008 Jérôme Glisse
++ * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Marek Olšák <maraeo@gmail.com>
++ */
++
++#include "amdgpu_cs.h"
++#include "os/os_time.h"
++#include <stdio.h>
++#include <amdgpu_drm.h>
++
++
++/* FENCES */
++
++static struct pipe_fence_handle *
++amdgpu_fence_create(unsigned ip, uint32_t instance)
++{
++   struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
++
++   fence->reference.count = 1;
++   fence->ip_type = ip;
++   fence->ring = instance;
++   fence->submission_in_progress = true;
++   return (struct pipe_fence_handle *)fence;
++}
++
++static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
++                                   uint64_t fence_id)
++{
++   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
++
++   rfence->fence = fence_id;
++   rfence->submission_in_progress = false;
++}
++
++static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
++{
++   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
++
++   rfence->signalled = true;
++}
++
++static bool amdgpu_fence_wait(struct radeon_winsys *rws,
++                              struct pipe_fence_handle *fence,
++                              uint64_t timeout)
++{
++   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
++   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
++   struct amdgpu_cs_query_fence query = {0};
++   uint32_t expired;
++   int r;
++
++   /* XXX Access to rfence->signalled is racy here. */
++   if (rfence->signalled)
++      return true;
++
++   /* The fence may not have a number assigned if its IB is being
++    * submitted in the other thread right now. Wait until the submission
++    * is done. */
++   if (rfence->submission_in_progress) {
++      if (!timeout) {
++         return FALSE;
++      } else if (timeout == PIPE_TIMEOUT_INFINITE) {
++         while (rfence->submission_in_progress)
++            sched_yield();
++      } else {
++         int64_t start_time = os_time_get_nano();
++         int64_t elapsed_time = 0;
++
++         while (rfence->submission_in_progress) {
++            elapsed_time = os_time_get_nano() - start_time;
++            if (elapsed_time >= timeout) {
++               return FALSE;
++            }
++            sched_yield();
++         }
++         timeout -= elapsed_time;
++      }
++   }
++
++   /* Now use the libdrm query. */
++   query.timeout_ns = timeout;
++   query.fence = rfence->fence;
++   query.context = ws->ctx;
++   query.ip_type = rfence->ip_type;
++   query.ip_instance = 0;
++   query.ring = rfence->ring;
++
++   r = amdgpu_cs_query_fence_status(ws->dev, &query, &expired);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_cs_query_fence_status failed.\n");
++      return FALSE;
++   }
++
++   rfence->signalled = expired != 0;
++   return rfence->signalled;
++}
++
++/* COMMAND SUBMISSION */
++
++static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
++{
++   struct amdgpu_cs_context *cur_cs = cs->csc;
++   struct amdgpu_winsys *ws = cs->ws;
++   struct amdgpu_cs_ib_alloc_result ib;
++   int r;
++
++   r = amdgpu_cs_alloc_ib(ws->dev, ws->ctx, amdgpu_cs_ib_size_64K, &ib);
++   if (r)
++      return false;
++
++   cs->base.buf = ib.cpu;
++   cs->base.cdw = 0;
++
++   cur_cs->ib.ib_handle = ib.handle;
++   return true;
++}
++
++static boolean amdgpu_init_cs_context(struct amdgpu_cs_context *csc)
++{
++   int i;
++
++   csc->request.number_of_ibs = 1;
++   csc->request.ibs = &csc->ib;
++
++   csc->max_num_buffers = 512;
++   csc->buffers = (struct amdgpu_cs_buffer*)
++                  CALLOC(1, csc->max_num_buffers * sizeof(struct amdgpu_cs_buffer));
++   if (!csc->buffers) {
++      return FALSE;
++   }
++
++   csc->handles = CALLOC(1, csc->max_num_buffers * sizeof(amdgpu_bo_handle));
++   if (!csc->handles) {
++      FREE(csc->buffers);
++      return FALSE;
++   }
++
++   csc->flags = CALLOC(1, csc->max_num_buffers);
++   if (!csc->flags) {
++      FREE(csc->handles);
++      FREE(csc->buffers);
++      return FALSE;
++   }
++
++   for (i = 0; i < Elements(csc->buffer_indices_hashlist); i++) {
++      csc->buffer_indices_hashlist[i] = -1;
++   }
++   return TRUE;
++}
++
++static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *csc)
++{
++   unsigned i;
++
++   for (i = 0; i < csc->num_buffers; i++) {
++      p_atomic_dec(&csc->buffers[i].bo->num_cs_references);
++      amdgpu_winsys_bo_reference(&csc->buffers[i].bo, NULL);
++      csc->handles[i] = NULL;
++      csc->flags[i] = 0;
++   }
++
++   csc->num_buffers = 0;
++   csc->used_gart = 0;
++   csc->used_vram = 0;
++   amdgpu_fence_reference(&csc->fence, NULL);
++
++   for (i = 0; i < Elements(csc->buffer_indices_hashlist); i++) {
++      csc->buffer_indices_hashlist[i] = -1;
++   }
++}
++
++static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *csc)
++{
++   amdgpu_cs_context_cleanup(csc);
++   FREE(csc->flags);
++   FREE(csc->buffers);
++   FREE(csc->handles);
++}
++
++
++static struct radeon_winsys_cs *
++amdgpu_cs_create(struct radeon_winsys *rws,
++                 enum ring_type ring_type,
++                 void (*flush)(void *ctx, unsigned flags,
++                               struct pipe_fence_handle **fence),
++                 void *flush_ctx,
++                 struct radeon_winsys_cs_handle *trace_buf)
++{
++   struct amdgpu_winsys *ws = amdgpu_winsys(rws);
++   struct amdgpu_cs *cs;
++
++   cs = CALLOC_STRUCT(amdgpu_cs);
++   if (!cs) {
++      return NULL;
++   }
++
++   pipe_semaphore_init(&cs->flush_completed, 1);
++
++   cs->ws = ws;
++   cs->flush_cs = flush;
++   cs->flush_data = flush_ctx;
++
++   if (!amdgpu_init_cs_context(&cs->csc1)) {
++      FREE(cs);
++      return NULL;
++   }
++   if (!amdgpu_init_cs_context(&cs->csc2)) {
++      amdgpu_destroy_cs_context(&cs->csc1);
++      FREE(cs);
++      return NULL;
++   }
++
++   /* Set the first command buffer as current. */
++   cs->csc = &cs->csc1;
++   cs->cst = &cs->csc2;
++   cs->base.ring_type = ring_type;
++
++   if (!amdgpu_get_new_ib(cs)) {
++      amdgpu_destroy_cs_context(&cs->csc2);
++      amdgpu_destroy_cs_context(&cs->csc1);
++      FREE(cs);
++      return NULL;
++   }
++
++   p_atomic_inc(&ws->num_cs);
++   return &cs->base;
++}
++
++#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
++
++int amdgpu_get_reloc(struct amdgpu_cs_context *csc, struct amdgpu_winsys_bo *bo)
++{
++   unsigned hash = bo->handle & (Elements(csc->buffer_indices_hashlist)-1);
++   int i = csc->buffer_indices_hashlist[hash];
++
++   /* not found or found */
++   if (i == -1 || csc->buffers[i].bo == bo)
++      return i;
++
++   /* Hash collision, look for the BO in the list of relocs linearly. */
++   for (i = csc->num_buffers - 1; i >= 0; i--) {
++      if (csc->buffers[i].bo == bo) {
++         /* Put this reloc in the hash list.
++          * This will prevent additional hash collisions if there are
++          * several consecutive get_reloc calls for the same buffer.
++          *
++          * Example: Assuming buffers A,B,C collide in the hash list,
++          * the following sequence of relocs:
++          *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
++          * will collide here: ^ and here:   ^,
++          * meaning that we should get very few collisions in the end. */
++         csc->buffer_indices_hashlist[hash] = i;
++         return i;
++      }
++   }
++   return -1;
++}
++
++static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
++                                 struct amdgpu_winsys_bo *bo,
++                                 enum radeon_bo_usage usage,
++                                 enum radeon_bo_domain domains,
++                                 unsigned priority,
++                                 enum radeon_bo_domain *added_domains)
++{
++   struct amdgpu_cs_context *csc = cs->csc;
++   struct amdgpu_cs_buffer *reloc;
++   unsigned hash = bo->handle & (Elements(csc->buffer_indices_hashlist)-1);
++   int i = -1;
++
++   priority = MIN2(priority, 15);
++   *added_domains = 0;
++
++   i = amdgpu_get_reloc(csc, bo);
++
++   if (i >= 0) {
++      reloc = &csc->buffers[i];
++      reloc->usage |= usage;
++      *added_domains = domains & ~reloc->domains;
++      reloc->domains |= domains;
++      csc->flags[i] = MAX2(csc->flags[i], priority);
++      return i;
++   }
++
++   /* New relocation, check if the backing array is large enough. */
++   if (csc->num_buffers >= csc->max_num_buffers) {
++      uint32_t size;
++      csc->max_num_buffers += 10;
++
++      size = csc->max_num_buffers * sizeof(struct amdgpu_cs_buffer);
++      csc->buffers = realloc(csc->buffers, size);
++
++      size = csc->max_num_buffers * sizeof(amdgpu_bo_handle);
++      csc->handles = realloc(csc->handles, size);
++
++      csc->flags = realloc(csc->flags, csc->max_num_buffers);
++   }
++
++   /* Initialize the new relocation. */
++   csc->buffers[csc->num_buffers].bo = NULL;
++   amdgpu_winsys_bo_reference(&csc->buffers[csc->num_buffers].bo, bo);
++   csc->handles[csc->num_buffers] = bo->bo;
++   csc->flags[csc->num_buffers] = priority;
++   p_atomic_inc(&bo->num_cs_references);
++   reloc = &csc->buffers[csc->num_buffers];
++   reloc->bo = bo;
++   reloc->usage = usage;
++   reloc->domains = domains;
++
++   csc->buffer_indices_hashlist[hash] = csc->num_buffers;
++
++   *added_domains = domains;
++   return csc->num_buffers++;
++}
++
++static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
++                                    struct radeon_winsys_cs_handle *buf,
++                                    enum radeon_bo_usage usage,
++                                    enum radeon_bo_domain domains,
++                                    enum radeon_bo_priority priority)
++{
++   struct amdgpu_cs *cs = amdgpu_cs(rcs);
++   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
++   enum radeon_bo_domain added_domains;
++   unsigned index = amdgpu_add_reloc(cs, bo, usage, domains, priority, &added_domains);
++
++   if (added_domains & RADEON_DOMAIN_GTT)
++      cs->csc->used_gart += bo->base.size;
++   if (added_domains & RADEON_DOMAIN_VRAM)
++      cs->csc->used_vram += bo->base.size;
++
++   return index;
++}
++
++static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs,
++                               struct radeon_winsys_cs_handle *buf)
++{
++   struct amdgpu_cs *cs = amdgpu_cs(rcs);
++
++   return amdgpu_get_reloc(cs->csc, (struct amdgpu_winsys_bo*)buf);
++}
++
++static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
++{
++   return TRUE;
++}
++
++static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
++{
++   struct amdgpu_cs *cs = amdgpu_cs(rcs);
++   boolean status =
++         (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
++         (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
++
++   return status;
++}
++
++void amdgpu_cs_emit_ioctl_oneshot(struct amdgpu_cs *cs, struct amdgpu_cs_context *csc)
++{
++   struct amdgpu_winsys *ws = cs->ws;
++   int i, r;
++   uint64_t fence;
++
++   r = amdgpu_cs_submit(ws->dev, ws->ctx, 0, &csc->request, 1, &fence);
++   if (r) {
++      fprintf(stderr, "amdgpu: The CS has been rejected, "
++              "see dmesg for more information.\n");
++
++      amdgpu_fence_signalled(csc->fence);
++   } else {
++      /* Success. */
++      amdgpu_fence_submitted(csc->fence, fence);
++
++      for (i = 0; i < csc->num_buffers; i++) {
++         amdgpu_fence_reference(&csc->buffers[i].bo->fence, csc->fence);
++      }
++   }
++
++   /* Cleanup. */
++   for (i = 0; i < csc->num_buffers; i++) {
++      p_atomic_dec(&csc->buffers[i].bo->num_active_ioctls);
++   }
++   amdgpu_cs_context_cleanup(csc);
++}
++
++/*
++ * Make sure previous submission of this cs are completed
++ */
++void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
++{
++   struct amdgpu_cs *cs = amdgpu_cs(rcs);
++
++   /* Wait for any pending ioctl to complete. */
++   if (cs->ws->thread) {
++      pipe_semaphore_wait(&cs->flush_completed);
++      pipe_semaphore_signal(&cs->flush_completed);
++   }
++}
++
++DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
++
++static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
++                            unsigned flags,
++                            struct pipe_fence_handle **fence,
++                            uint32_t cs_trace_id)
++{
++   struct amdgpu_cs *cs = amdgpu_cs(rcs);
++   struct amdgpu_cs_context *tmp;
++
++   switch (cs->base.ring_type) {
++   case RING_DMA:
++      /* pad DMA ring to 8 DWs */
++      if (cs->ws->info.chip_class <= SI) {
++         while (rcs->cdw & 7)
++            OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
++      } else {
++         while (rcs->cdw & 7)
++            OUT_CS(&cs->base, 0x00000000); /* NOP packet */
++      }
++      break;
++   case RING_GFX:
++      /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
++             * r6xx, requires at least 4 dw alignment to avoid a hw bug.
++             */
++      if (cs->ws->info.chip_class <= SI) {
++         while (rcs->cdw & 7)
++            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
++      } else {
++         while (rcs->cdw & 7)
++            OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
++      }
++      break;
++   case RING_UVD:
++      while (rcs->cdw & 15)
++         OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
++      break;
++   default:
++      break;
++   }
++
++   if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
++      fprintf(stderr, "amdgpu: command stream overflowed\n");
++   }
++
++   amdgpu_cs_sync_flush(rcs);
++
++   /* Swap command streams. */
++   tmp = cs->csc;
++   cs->csc = cs->cst;
++   cs->cst = tmp;
++
++   /* If the CS is not empty or overflowed, emit it in a separate thread. */
++   if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
++      unsigned i, num_buffers = cs->cst->num_buffers;
++
++      cs->cst->ib.size = cs->base.cdw;
++      cs->cst->request.number_of_resources = cs->cst->num_buffers;
++      cs->cst->request.resources = cs->cst->handles;
++      cs->cst->request.resource_flags = cs->cst->flags;
++
++      for (i = 0; i < num_buffers; i++) {
++         /* Update the number of active asynchronous CS ioctls for the buffer. */
++         p_atomic_inc(&cs->cst->buffers[i].bo->num_active_ioctls);
++      }
++
++      switch (cs->base.ring_type) {
++      case RING_DMA:
++         cs->cst->request.ip_type = AMDGPU_HW_IP_DMA;
++         break;
++
++      case RING_UVD:
++         cs->cst->request.ip_type = AMDGPU_HW_IP_UVD;
++         break;
++
++      case RING_VCE:
++         cs->cst->request.ip_type = AMDGPU_HW_IP_VCE;
++         break;
++
++      default:
++      case RING_GFX:
++         if (flags & RADEON_FLUSH_COMPUTE) {
++            cs->cst->request.ip_type = AMDGPU_HW_IP_COMPUTE;
++         } else {
++            cs->cst->request.ip_type = AMDGPU_HW_IP_GFX;
++         }
++         break;
++      }
++
++      amdgpu_fence_reference(&cs->cst->fence, NULL);
++      cs->cst->fence = amdgpu_fence_create(cs->cst->request.ip_type,
++                                           cs->cst->request.ring);
++
++      if (fence)
++         amdgpu_fence_reference(fence, cs->cst->fence);
++
++      if (cs->ws->thread) {
++         pipe_semaphore_wait(&cs->flush_completed);
++         amdgpu_ws_queue_cs(cs->ws, cs);
++         if (!(flags & RADEON_FLUSH_ASYNC))
++            amdgpu_cs_sync_flush(rcs);
++      } else {
++         amdgpu_cs_emit_ioctl_oneshot(cs, cs->cst);
++      }
++   } else {
++      amdgpu_cs_context_cleanup(cs->cst);
++   }
++
++   amdgpu_get_new_ib(cs);
++
++   cs->ws->num_cs_flushes++;
++}
++
++static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
++{
++   struct amdgpu_cs *cs = amdgpu_cs(rcs);
++
++   amdgpu_cs_sync_flush(rcs);
++   pipe_semaphore_destroy(&cs->flush_completed);
++   amdgpu_cs_context_cleanup(&cs->csc1);
++   amdgpu_cs_context_cleanup(&cs->csc2);
++   p_atomic_dec(&cs->ws->num_cs);
++   amdgpu_cs_free_ib(cs->ws->dev, cs->ws->ctx,
++                     cs->csc->ib.ib_handle);
++   amdgpu_destroy_cs_context(&cs->csc1);
++   amdgpu_destroy_cs_context(&cs->csc2);
++   FREE(cs);
++}
++
++static boolean amdgpu_bo_is_referenced(struct radeon_winsys_cs *rcs,
++                                       struct radeon_winsys_cs_handle *_buf,
++                                       enum radeon_bo_usage usage)
++{
++   struct amdgpu_cs *cs = amdgpu_cs(rcs);
++   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)_buf;
++
++   return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage);
++}
++
++void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
++{
++   ws->base.cs_create = amdgpu_cs_create;
++   ws->base.cs_destroy = amdgpu_cs_destroy;
++   ws->base.cs_add_reloc = amdgpu_cs_add_reloc;
++   ws->base.cs_get_reloc = amdgpu_cs_get_reloc;
++   ws->base.cs_validate = amdgpu_cs_validate;
++   ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
++   ws->base.cs_flush = amdgpu_cs_flush;
++   ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
++   ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
++   ws->base.fence_wait = amdgpu_fence_wait;
++   ws->base.fence_reference = amdgpu_fence_reference;
++}
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_cs.h b/src/gallium/winsys/radeon/amdgpu/amdgpu_cs.h
+new file mode 100644
+index 0000000..36a9aad
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_cs.h
+@@ -0,0 +1,149 @@
++/*
++ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++#ifndef AMDGPU_DRM_CS_H
++#define AMDGPU_DRM_CS_H
++
++#include "amdgpu_bo.h"
++#include "util/u_memory.h"
++
++struct amdgpu_cs_buffer {
++   struct amdgpu_winsys_bo *bo;
++   enum radeon_bo_usage usage;
++   enum radeon_bo_domain domains;
++};
++
++struct amdgpu_cs_context {
++   struct amdgpu_cs_request    request;
++   struct amdgpu_cs_ib_info    ib;
++
++   /* Relocs. */
++   unsigned                    max_num_buffers;
++   unsigned                    num_buffers;
++   amdgpu_bo_handle            *handles;
++   uint8_t                     *flags;
++   struct amdgpu_cs_buffer     *buffers;
++
++   int                         buffer_indices_hashlist[512];
++
++   unsigned                    used_vram;
++   unsigned                    used_gart;
++
++   struct pipe_fence_handle    *fence;
++};
++
++struct amdgpu_cs {
++   struct radeon_winsys_cs base;
++
++   /* We flip between these two CS. While one is being consumed
++    * by the kernel in another thread, the other one is being filled
++    * by the pipe driver. */
++   struct amdgpu_cs_context csc1;
++   struct amdgpu_cs_context csc2;
++   /* The currently-used CS. */
++   struct amdgpu_cs_context *csc;
++   /* The CS being currently-owned by the other thread. */
++   struct amdgpu_cs_context *cst;
++
++   /* The winsys. */
++   struct amdgpu_winsys *ws;
++
++   /* Flush CS. */
++   void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
++   void *flush_data;
++
++   pipe_semaphore flush_completed;
++};
++
++struct amdgpu_fence {
++   struct pipe_reference reference;
++
++   uint64_t fence;          /* fence ID */
++   unsigned ip_type;        /* which hw ip block the fence belongs to */
++   uint32_t ring;           /* ring index of the hw ip block */
++
++   /* If the fence is unknown due to an IB still being submitted
++     * in the other thread. */
++   bool submission_in_progress;
++   bool signalled;
++};
++
++static INLINE void amdgpu_fence_reference(struct pipe_fence_handle **dst,
++                                          struct pipe_fence_handle *src)
++{
++   struct amdgpu_fence **rdst = (struct amdgpu_fence **)dst;
++   struct amdgpu_fence *rsrc = (struct amdgpu_fence *)src;
++
++   if (pipe_reference(&(*rdst)->reference, &rsrc->reference))
++      FREE(*rdst);
++   *rdst = rsrc;
++}
++
++int amdgpu_get_reloc(struct amdgpu_cs_context *csc, struct amdgpu_winsys_bo *bo);
++
++static INLINE struct amdgpu_cs *
++amdgpu_cs(struct radeon_winsys_cs *base)
++{
++   return (struct amdgpu_cs*)base;
++}
++
++static INLINE boolean
++amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
++                              struct amdgpu_winsys_bo *bo)
++{
++   int num_refs = bo->num_cs_references;
++   return num_refs == bo->rws->num_cs ||
++         (num_refs && amdgpu_get_reloc(cs->csc, bo) != -1);
++}
++
++static INLINE boolean
++amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
++                                         struct amdgpu_winsys_bo *bo,
++                                         enum radeon_bo_usage usage)
++{
++   int index;
++
++   if (!bo->num_cs_references)
++      return FALSE;
++
++   index = amdgpu_get_reloc(cs->csc, bo);
++   if (index == -1)
++      return FALSE;
++
++   return (cs->csc->buffers[index].usage & usage) != 0;
++}
++
++static INLINE boolean
++amdgpu_bo_is_referenced_by_any_cs(struct amdgpu_winsys_bo *bo)
++{
++   return bo->num_cs_references != 0;
++}
++
++void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs);
++void amdgpu_cs_init_functions(struct amdgpu_winsys *ws);
++void amdgpu_cs_emit_ioctl_oneshot(struct amdgpu_cs *cs, struct amdgpu_cs_context *csc);
++
++#endif
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_public.h b/src/gallium/winsys/radeon/amdgpu/amdgpu_public.h
+new file mode 100644
+index 0000000..4a7aa8e
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_public.h
+@@ -0,0 +1,14 @@
++#ifndef AMDGPU_DRM_PUBLIC_H
++#define AMDGPU_DRM_PUBLIC_H
++
++#include "pipe/p_defines.h"
++
++struct radeon_winsys;
++struct pipe_screen;
++
++typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *);
++
++struct radeon_winsys *
++amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create);
++
++#endif
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c b/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c
+new file mode 100644
+index 0000000..0f3367a
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c
+@@ -0,0 +1,491 @@
++/*
++ * Copyright © 2009 Corbin Simpson
++ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Corbin Simpson <MostAwesomeDude@gmail.com>
++ *      Joakim Sindholt <opensource@zhasha.com>
++ *      Marek Olšák <maraeo@gmail.com>
++ */
++
++#include "amdgpu_cs.h"
++#include "amdgpu_public.h"
++
++#include "util/u_hash_table.h"
++#include <amdgpu_drm.h>
++#include <xf86drm.h>
++#include <stdio.h>
++#include <sys/stat.h>
++
++#define CIK_TILE_MODE_COLOR_2D			14
++
++#define CIK__GB_TILE_MODE__PIPE_CONFIG(x)        (((x) >> 6) & 0x1f)
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P2               0
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16          4
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16         5
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32         6
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32         7
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16    8
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16    9
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16    10
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16   11
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16   12
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32   13
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32   14
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16   16
++#define     CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16  17
++
++static struct util_hash_table *fd_tab = NULL;
++pipe_static_mutex(fd_tab_mutex);
++
++static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
++{
++   unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D];
++
++   switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
++   case CIK__PIPE_CONFIG__ADDR_SURF_P2:
++   default:
++       return 2;
++   case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32:
++       return 4;
++   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32:
++       return 8;
++   case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
++   case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
++       return 16;
++   }
++}
++
++/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG
++ * into GB_TILING_CONFIG register which is only present on R600-R700. */
++static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info)
++{
++   unsigned num_pipes = info->gb_addr_cfg & 0x7;
++   unsigned num_banks = info->mc_arb_ramcfg & 0x3;
++   unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7;
++   unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3;
++
++   return num_pipes | (num_banks << 4) |
++         (pipe_interleave_bytes << 8) |
++         (row_size << 12);
++}
++
++/* Helper function to do the ioctls needed for setup and init. */
++static boolean do_winsys_init(struct amdgpu_winsys *ws)
++{
++   struct amdgpu_heap_info vram, gtt;
++   struct drm_amdgpu_info_hw_ip dma, uvd, vce;
++   uint32_t vce_version, vce_feature;
++   int r;
++
++   ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
++
++   r = amdgpu_device_initialize(ws->fd, &ws->info.drm_major,
++                                &ws->info.drm_minor, &ws->dev);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n");
++      return FALSE;
++   }
++
++   /* Query hardware and driver information. */
++   r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n");
++      goto fail;
++   }
++
++   r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
++      goto fail;
++   }
++
++   r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
++      goto fail;
++   }
++
++   r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_DMA, 0, &dma);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
++      goto fail;
++   }
++
++   r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_UVD, 0, &uvd);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n");
++      goto fail;
++   }
++
++   r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_VCE, 0, &vce);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n");
++      goto fail;
++   }
++
++   r = amdgpu_query_firmware_version(ws->dev, AMDGPU_INFO_FW_VCE, 0, 0,
++				     &vce_version, &vce_feature);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n");
++      goto fail;
++   }
++
++   r = amdgpu_cs_ctx_create(ws->dev, &ws->ctx);
++   if (r) {
++      fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed.\n");
++      goto fail;
++   }
++
++   /* Set chip identification. */
++   ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */
++
++   switch (ws->info.pci_id) {
++#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; break;
++#include "pci_ids/radeonsi_pci_ids.h"
++#undef CHIPSET
++
++   default:
++      fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
++      goto fail;
++   }
++
++   if (ws->info.family >= CHIP_TONGA)
++      ws->info.chip_class = VI;
++   else if (ws->info.family >= CHIP_BONAIRE)
++      ws->info.chip_class = CIK;
++   else {
++      fprintf(stderr, "amdgpu: Unknown family.\n");
++      goto fail;
++   }
++
++   /* LLVM 3.6 is required for VI. */
++   if (ws->info.chip_class >= VI && HAVE_LLVM < 0x0306) {
++      fprintf(stderr, "amdgpu: LLVM 3.6 is required, got LLVM %i.%i.\n",
++              HAVE_LLVM >> 8, HAVE_LLVM & 255);
++      goto fail;
++   }
++
++   /* Set hardware information. */
++   ws->info.gart_size = gtt.heap_size;
++   ws->info.vram_size = vram.heap_size;
++   /* convert the shader clock from KHz to MHz */
++   ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
++   ws->info.max_compute_units = 1; /* TODO */
++   ws->info.max_se = ws->amdinfo.num_shader_engines;
++   ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
++   ws->info.has_uvd = uvd.available_rings != 0;
++   ws->info.vce_fw_version =
++         vce.available_rings ? vce_version : 0;
++   ws->info.r600_num_backends = ws->amdinfo.rb_pipes;
++   ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
++   ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo);
++   ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
++   ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */
++   ws->info.r600_virtual_address = TRUE;
++   ws->info.r600_has_dma = dma.available_rings != 0;
++
++   memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
++          sizeof(ws->amdinfo.gb_tile_mode));
++   ws->info.si_tile_mode_array_valid = TRUE;
++   ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask;
++
++   memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode,
++          sizeof(ws->amdinfo.gb_macro_tile_mode));
++   ws->info.cik_macrotile_mode_array_valid = TRUE;
++
++   return TRUE;
++
++fail:
++   if (ws->ctx) {
++      amdgpu_cs_ctx_free(ws->dev, ws->ctx);
++   }
++   amdgpu_device_deinitialize(ws->dev);
++   ws->dev = NULL;
++   return FALSE;
++}
++
++static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
++{
++   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
++
++   if (ws->thread) {
++      ws->kill_thread = 1;
++      pipe_semaphore_signal(&ws->cs_queued);
++      pipe_thread_wait(ws->thread);
++   }
++   pipe_semaphore_destroy(&ws->cs_queued);
++   pipe_mutex_destroy(ws->cs_stack_lock);
++
++   ws->cman->destroy(ws->cman);
++   ws->kman->destroy(ws->kman);
++
++   amdgpu_cs_ctx_free(ws->dev, ws->ctx);
++   amdgpu_device_deinitialize(ws->dev);
++   FREE(rws);
++}
++
++static void amdgpu_winsys_query_info(struct radeon_winsys *rws,
++                                     struct radeon_info *info)
++{
++   *info = ((struct amdgpu_winsys *)rws)->info;
++}
++
++static boolean amdgpu_cs_request_feature(struct radeon_winsys_cs *rcs,
++                                         enum radeon_feature_id fid,
++                                         boolean enable)
++{
++   return FALSE;
++}
++
++static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
++                                   enum radeon_value_id value)
++{
++   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
++   struct amdgpu_heap_info heap;
++   uint64_t retval = 0;
++
++   switch (value) {
++   case RADEON_REQUESTED_VRAM_MEMORY:
++      return ws->allocated_vram;
++   case RADEON_REQUESTED_GTT_MEMORY:
++      return ws->allocated_gtt;
++   case RADEON_BUFFER_WAIT_TIME_NS:
++      return ws->buffer_wait_time;
++   case RADEON_TIMESTAMP:
++      amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
++      return retval;
++   case RADEON_NUM_CS_FLUSHES:
++      return ws->num_cs_flushes;
++   case RADEON_NUM_BYTES_MOVED:
++      amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
++      return retval;
++   case RADEON_VRAM_USAGE:
++      amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
++      return heap.heap_usage;
++   case RADEON_GTT_USAGE:
++      amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
++      return heap.heap_usage;
++   }
++   return 0;
++}
++
++static unsigned hash_fd(void *key)
++{
++   int fd = pointer_to_intptr(key);
++   struct stat stat;
++   fstat(fd, &stat);
++
++   return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
++}
++
++static int compare_fd(void *key1, void *key2)
++{
++   int fd1 = pointer_to_intptr(key1);
++   int fd2 = pointer_to_intptr(key2);
++   struct stat stat1, stat2;
++   fstat(fd1, &stat1);
++   fstat(fd2, &stat2);
++
++   return stat1.st_dev != stat2.st_dev ||
++          stat1.st_ino != stat2.st_ino ||
++          stat1.st_rdev != stat2.st_rdev;
++}
++
++void amdgpu_ws_queue_cs(struct amdgpu_winsys *ws, struct amdgpu_cs *cs)
++{
++retry:
++   pipe_mutex_lock(ws->cs_stack_lock);
++   if (ws->num_enqueued_cs >= RING_LAST) {
++      /* no room left for a flush */
++      pipe_mutex_unlock(ws->cs_stack_lock);
++      goto retry;
++   }
++   ws->cs_stack[ws->num_enqueued_cs++] = cs;
++   pipe_mutex_unlock(ws->cs_stack_lock);
++   pipe_semaphore_signal(&ws->cs_queued);
++}
++
++static PIPE_THREAD_ROUTINE(amdgpu_cs_emit_ioctl, param)
++{
++   struct amdgpu_winsys *ws = (struct amdgpu_winsys *)param;
++   struct amdgpu_cs *cs;
++   unsigned i;
++
++   while (1) {
++      pipe_semaphore_wait(&ws->cs_queued);
++      if (ws->kill_thread)
++         break;
++
++      pipe_mutex_lock(ws->cs_stack_lock);
++      cs = ws->cs_stack[0];
++      for (i = 1; i < ws->num_enqueued_cs; i++)
++         ws->cs_stack[i - 1] = ws->cs_stack[i];
++      ws->cs_stack[--ws->num_enqueued_cs] = NULL;
++      pipe_mutex_unlock(ws->cs_stack_lock);
++
++      if (cs) {
++         amdgpu_cs_emit_ioctl_oneshot(cs, cs->cst);
++         pipe_semaphore_signal(&cs->flush_completed);
++      }
++   }
++   pipe_mutex_lock(ws->cs_stack_lock);
++   for (i = 0; i < ws->num_enqueued_cs; i++) {
++      pipe_semaphore_signal(&ws->cs_stack[i]->flush_completed);
++      ws->cs_stack[i] = NULL;
++   }
++   ws->num_enqueued_cs = 0;
++   pipe_mutex_unlock(ws->cs_stack_lock);
++   return 0;
++}
++
++DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
++static PIPE_THREAD_ROUTINE(amdgpu_cs_emit_ioctl, param);
++
++static bool amdgpu_winsys_unref(struct radeon_winsys *ws)
++{
++   struct amdgpu_winsys *rws = (struct amdgpu_winsys*)ws;
++   bool destroy;
++
++   /* When the reference counter drops to zero, remove the fd from the table.
++    * This must happen while the mutex is locked, so that
++    * amdgpu_winsys_create in another thread doesn't get the winsys
++    * from the table when the counter drops to 0. */
++   pipe_mutex_lock(fd_tab_mutex);
++
++   destroy = pipe_reference(&rws->reference, NULL);
++   if (destroy && fd_tab)
++      util_hash_table_remove(fd_tab, intptr_to_pointer(rws->fd));
++
++   pipe_mutex_unlock(fd_tab_mutex);
++   return destroy;
++}
++
++struct radeon_winsys *
++      amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
++{
++   struct amdgpu_winsys *ws;
++   drmVersionPtr version = drmGetVersion(fd);
++
++   /* The DRM driver version of amdgpu is 3.x.x. */
++   if (version->version_major != 3) {
++      drmFreeVersion(version);
++      return NULL;
++   }
++   drmFreeVersion(version);
++
++   /* Look up the winsys from the fd table. */
++   pipe_mutex_lock(fd_tab_mutex);
++   if (!fd_tab) {
++      fd_tab = util_hash_table_create(hash_fd, compare_fd);
++   }
++
++   ws = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
++   if (ws) {
++      pipe_reference(NULL, &ws->reference);
++      pipe_mutex_unlock(fd_tab_mutex);
++      return &ws->base;
++   }
++
++   ws = CALLOC_STRUCT(amdgpu_winsys);
++   if (!ws) {
++      pipe_mutex_unlock(fd_tab_mutex);
++      return NULL;
++   }
++
++   ws->fd = fd;
++
++   if (!do_winsys_init(ws))
++      goto fail;
++
++   /* Create managers. */
++   ws->kman = amdgpu_bomgr_create(ws);
++   if (!ws->kman)
++      goto fail;
++   ws->cman = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0,
++			(ws->info.vram_size + ws->info.gart_size) / 8);
++   if (!ws->cman)
++      goto fail;
++
++   /* init reference */
++   pipe_reference_init(&ws->reference, 1);
++
++   /* Set functions. */
++   ws->base.unref = amdgpu_winsys_unref;
++   ws->base.destroy = amdgpu_winsys_destroy;
++   ws->base.query_info = amdgpu_winsys_query_info;
++   ws->base.cs_request_feature = amdgpu_cs_request_feature;
++   ws->base.query_value = amdgpu_query_value;
++
++   amdgpu_bomgr_init_functions(ws);
++   amdgpu_cs_init_functions(ws);
++
++   pipe_mutex_init(ws->cs_stack_lock);
++
++   ws->num_enqueued_cs = 0;
++   pipe_semaphore_init(&ws->cs_queued, 0);
++   if (ws->num_cpus > 1 && debug_get_option_thread())
++      ws->thread = pipe_thread_create(amdgpu_cs_emit_ioctl, ws);
++
++   /* Create the screen at the end. The winsys must be initialized
++    * completely.
++    *
++    * Alternatively, we could create the screen based on "ws->gen"
++    * and link all drivers into one binary blob. */
++   ws->base.screen = screen_create(&ws->base);
++   if (!ws->base.screen) {
++      amdgpu_winsys_destroy(&ws->base);
++      pipe_mutex_unlock(fd_tab_mutex);
++      return NULL;
++   }
++
++   util_hash_table_set(fd_tab, intptr_to_pointer(fd), ws);
++
++   /* We must unlock the mutex once the winsys is fully initialized, so that
++    * other threads attempting to create the winsys from the same fd will
++    * get a fully initialized winsys and not just half-way initialized. */
++   pipe_mutex_unlock(fd_tab_mutex);
++
++   return &ws->base;
++
++fail:
++   pipe_mutex_unlock(fd_tab_mutex);
++   if (ws->cman)
++      ws->cman->destroy(ws->cman);
++   if (ws->kman)
++      ws->kman->destroy(ws->kman);
++   FREE(ws);
++   return NULL;
++}
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h b/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h
+new file mode 100644
+index 0000000..fc27f1c
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h
+@@ -0,0 +1,80 @@
++/*
++ * Copyright © 2009 Corbin Simpson
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Corbin Simpson <MostAwesomeDude@gmail.com>
++ */
++#ifndef AMDGPU_DRM_WINSYS_H
++#define AMDGPU_DRM_WINSYS_H
++
++#include "../radeon_winsys.h"
++#include "os/os_thread.h"
++#include <amdgpu.h>
++
++struct amdgpu_cs;
++
++struct amdgpu_winsys {
++   struct radeon_winsys base;
++   struct pipe_reference reference;
++
++   int fd; /* DRM file descriptor */
++   amdgpu_device_handle dev;
++   /* This only affects the order in which IBs are executed. */
++   amdgpu_context_handle ctx;
++
++   int num_cs; /* The number of command streams created. */
++   uint64_t allocated_vram;
++   uint64_t allocated_gtt;
++   uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
++   uint64_t num_cs_flushes;
++
++   struct radeon_info info;
++
++   struct pb_manager *kman;
++   struct pb_manager *cman;
++
++   uint32_t num_cpus;      /* Number of CPUs. */
++
++   /* rings submission thread */
++   pipe_mutex cs_stack_lock;
++   pipe_semaphore cs_queued;
++   pipe_thread thread;
++   int kill_thread;
++   int num_enqueued_cs;
++   struct amdgpu_cs *cs_stack[RING_LAST];
++
++   struct amdgpu_gpu_info amdinfo;
++};
++
++static INLINE struct amdgpu_winsys *
++amdgpu_winsys(struct radeon_winsys *base)
++{
++   return (struct amdgpu_winsys*)base;
++}
++
++void amdgpu_ws_queue_cs(struct amdgpu_winsys *ws, struct amdgpu_cs *cs);
++
++#endif
+diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+index 12767bf..a312f03 100644
+--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
++++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+@@ -34,6 +34,7 @@
+ #include "radeon_drm_bo.h"
+ #include "radeon_drm_cs.h"
+ #include "radeon_drm_public.h"
++#include "../amdgpu/amdgpu_public.h"
+ 
+ #include "pipebuffer/pb_bufmgr.h"
+ #include "util/u_memory.h"
+@@ -643,6 +644,13 @@ PUBLIC struct radeon_winsys *
+ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
+ {
+     struct radeon_drm_winsys *ws;
++    struct radeon_winsys *amdgpu;
++
++    /* First, try amdgpu. */
++    amdgpu = amdgpu_winsys_create(fd, screen_create);
++    if (amdgpu) {
++        return amdgpu;
++    }
+ 
+     pipe_mutex_lock(fd_tab_mutex);
+     if (!fd_tab) {
+diff --git a/src/gallium/winsys/radeon/radeon_winsys.h b/src/gallium/winsys/radeon/radeon_winsys.h
+index 7fb7ac9..a3cb273 100644
+--- a/src/gallium/winsys/radeon/radeon_winsys.h
++++ b/src/gallium/winsys/radeon/radeon_winsys.h
+@@ -136,6 +136,9 @@ enum radeon_family {
+     CHIP_KABINI,
+     CHIP_HAWAII,
+     CHIP_MULLINS,
++    CHIP_TONGA,
++    CHIP_ICELAND,
++    CHIP_CARRIZO,
+     CHIP_LAST,
+ };
+ 
+@@ -150,6 +153,7 @@ enum chip_class {
+     CAYMAN,
+     SI,
+     CIK,
++    VI,
+ };
+ 
+ enum ring_type {
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0009-winsys-amdgpu-add-addrlib-texture-addressing-and-ali.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0009-winsys-amdgpu-add-addrlib-texture-addressing-and-ali.patch
new file mode 100644
index 00000000..0ca8a819
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0009-winsys-amdgpu-add-addrlib-texture-addressing-and-ali.patch
@@ -0,0 +1,22649 @@
+From dbeaed6cf049a0be97631ab74afa1f4ab9a800bf Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 19:41:33 +0200
+Subject: [PATCH 09/29] winsys/amdgpu: add addrlib - texture addressing and
+ alignment calculator
+
+This is an internal project that Catalyst uses and now open source will do
+too.
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/winsys/radeon/amdgpu/Makefile.am       |    7 +-
+ src/gallium/winsys/radeon/amdgpu/Makefile.sources  |   23 +
+ .../winsys/radeon/amdgpu/addrlib/addrinterface.cpp | 1008 +++++
+ .../winsys/radeon/amdgpu/addrlib/addrinterface.h   | 2166 +++++++++
+ .../winsys/radeon/amdgpu/addrlib/addrtypes.h       |  590 +++
+ .../winsys/radeon/amdgpu/addrlib/core/addrcommon.h |  558 +++
+ .../radeon/amdgpu/addrlib/core/addrelemlib.cpp     | 1678 +++++++
+ .../radeon/amdgpu/addrlib/core/addrelemlib.h       |  270 ++
+ .../winsys/radeon/amdgpu/addrlib/core/addrlib.cpp  | 4028 +++++++++++++++++
+ .../winsys/radeon/amdgpu/addrlib/core/addrlib.h    |  695 +++
+ .../radeon/amdgpu/addrlib/core/addrobject.cpp      |  246 ++
+ .../winsys/radeon/amdgpu/addrlib/core/addrobject.h |   89 +
+ .../amdgpu/addrlib/inc/chip/r800/si_gb_reg.h       |  155 +
+ .../radeon/amdgpu/addrlib/inc/lnx_common_defs.h    |  129 +
+ .../addrlib/r800/chip/si_ci_vi_merged_enum.h       |   40 +
+ .../radeon/amdgpu/addrlib/r800/ciaddrlib.cpp       | 1777 ++++++++
+ .../winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.h  |  197 +
+ .../radeon/amdgpu/addrlib/r800/egbaddrlib.cpp      | 4578 ++++++++++++++++++++
+ .../winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.h |  411 ++
+ .../radeon/amdgpu/addrlib/r800/siaddrlib.cpp       | 2818 ++++++++++++
+ .../winsys/radeon/amdgpu/addrlib/r800/siaddrlib.h  |  262 ++
+ src/gallium/winsys/radeon/amdgpu/amdgpu_id.h       |  157 +
+ src/gallium/winsys/radeon/amdgpu/amdgpu_surface.c  |  436 ++
+ src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c   |   50 +
+ src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h   |    6 +
+ 25 files changed, 22373 insertions(+), 1 deletion(-)
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/addrinterface.cpp
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/addrinterface.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/addrtypes.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/core/addrcommon.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/core/addrelemlib.cpp
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/core/addrelemlib.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/core/addrlib.cpp
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/core/addrlib.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/core/addrobject.cpp
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/core/addrobject.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/inc/chip/r800/si_gb_reg.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/inc/lnx_common_defs.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/r800/chip/si_ci_vi_merged_enum.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.cpp
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.cpp
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/r800/siaddrlib.cpp
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/addrlib/r800/siaddrlib.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_id.h
+ create mode 100644 src/gallium/winsys/radeon/amdgpu/amdgpu_surface.c
+
+diff --git a/src/gallium/winsys/radeon/amdgpu/Makefile.am b/src/gallium/winsys/radeon/amdgpu/Makefile.am
+index 80ecb75..a719913 100644
+--- a/src/gallium/winsys/radeon/amdgpu/Makefile.am
++++ b/src/gallium/winsys/radeon/amdgpu/Makefile.am
+@@ -3,7 +3,12 @@ include $(top_srcdir)/src/gallium/Automake.inc
+ 
+ AM_CFLAGS = \
+ 	$(GALLIUM_WINSYS_CFLAGS) \
+-	$(AMDGPU_CFLAGS)
++	$(AMDGPU_CFLAGS) \
++	-I$(srcdir)/addrlib \
++	-I$(srcdir)/addrlib/core \
++	-I$(srcdir)/addrlib/inc/chip/r800 \
++	-I$(srcdir)/addrlib/r800/chip \
++	-DBRAHMA_BUILD=1
+ 
+ AM_CXXFLAGS = $(AM_CFLAGS)
+ 
+diff --git a/src/gallium/winsys/radeon/amdgpu/Makefile.sources b/src/gallium/winsys/radeon/amdgpu/Makefile.sources
+index 0f55010..6b33841 100644
+--- a/src/gallium/winsys/radeon/amdgpu/Makefile.sources
++++ b/src/gallium/winsys/radeon/amdgpu/Makefile.sources
+@@ -1,8 +1,31 @@
+ C_SOURCES := \
++	addrlib/addrinterface.cpp \
++	addrlib/addrinterface.h \
++	addrlib/addrtypes.h \
++	addrlib/core/addrcommon.h \
++	addrlib/core/addrelemlib.cpp \
++	addrlib/core/addrelemlib.h \
++	addrlib/core/addrlib.cpp \
++	addrlib/core/addrlib.h \
++	addrlib/core/addrobject.cpp \
++	addrlib/core/addrobject.h \
++	addrlib/inc/chip/r800/si_gb_reg.h \
++	addrlib/inc/lnx_common_defs.h \
++	addrlib/r800/chip/si_ci_merged_enum.h \
++	addrlib/r800/chip/si_ci_vi_merged_enum.h \
++	addrlib/r800/chip/si_enum.h \
++	addrlib/r800/ciaddrlib.cpp \
++	addrlib/r800/ciaddrlib.h \
++	addrlib/r800/egbaddrlib.cpp \
++	addrlib/r800/egbaddrlib.h \
++	addrlib/r800/siaddrlib.cpp \
++	addrlib/r800/siaddrlib.h \
+ 	amdgpu_bo.c \
+ 	amdgpu_bo.h \
+ 	amdgpu_cs.c \
+ 	amdgpu_cs.h \
++	amdgpu_id.h \
+ 	amdgpu_public.h \
++	amdgpu_surface.c \
+ 	amdgpu_winsys.c \
+ 	amdgpu_winsys.h
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/addrinterface.cpp b/src/gallium/winsys/radeon/amdgpu/addrlib/addrinterface.cpp
+new file mode 100644
+index 0000000..6556927
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/addrinterface.cpp
+@@ -0,0 +1,1008 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrinterface.cpp
++* @brief Contains the addrlib interface functions
++***************************************************************************************************
++*/
++#include "addrinterface.h"
++#include "addrlib.h"
++
++#include "addrcommon.h"
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               Create/Destroy/Config functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrCreate
++*
++*   @brief
++*       Create address lib object
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrCreate(
++    const ADDR_CREATE_INPUT*    pAddrCreateIn,  ///< [in] infomation for creating address lib object
++    ADDR_CREATE_OUTPUT*         pAddrCreateOut) ///< [out] address lib handle
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    returnCode = AddrLib::Create(pAddrCreateIn, pAddrCreateOut);
++
++    return returnCode;
++}
++
++
++
++/**
++***************************************************************************************************
++*   AddrDestroy
++*
++*   @brief
++*       Destroy address lib object
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrDestroy(
++    ADDR_HANDLE hLib) ///< [in] address lib handle
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (hLib)
++    {
++        AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++        pLib->Destroy();
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                    Surface functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrComputeSurfaceInfo
++*
++*   @brief
++*       Calculate surface width/height/depth/alignments and suitable tiling mode
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
++    ADDR_HANDLE                             hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,  ///< [in] surface information
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut) ///< [out] surface parameters and alignments
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++
++
++/**
++***************************************************************************************************
++*   AddrComputeSurfaceAddrFromCoord
++*
++*   @brief
++*       Compute surface address according to coordinates
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
++    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,  ///< [in] surface info and coordinates
++    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] surface address
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeSurfaceCoordFromAddr
++*
++*   @brief
++*       Compute coordinates according to surface address
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
++    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,  ///< [in] surface info and address
++    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) ///< [out] coordinates
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                   HTile functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrComputeHtileInfo
++*
++*   @brief
++*       Compute Htile pitch, height, base alignment and size in bytes
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
++    ADDR_HANDLE                             hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,  ///< [in] Htile information
++    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut) ///< [out] Htile pitch, height and size in bytes
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeHtileInfo(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeHtileAddrFromCoord
++*
++*   @brief
++*       Compute Htile address according to coordinates (of depth buffer)
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
++    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Htile info and coordinates
++    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Htile address
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeHtileCoordFromAddr
++*
++*   @brief
++*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
++*       Htile address
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
++    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,  ///< [in] Htile info and address
++    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Htile coordinates
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                     C-mask functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrComputeCmaskInfo
++*
++*   @brief
++*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
++*       info
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
++    ADDR_HANDLE                             hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,  ///< [in] Cmask pitch and height
++    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut) ///< [out] Cmask pitch, height and size in bytes
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeCmaskAddrFromCoord
++*
++*   @brief
++*       Compute Cmask address according to coordinates (of MSAA color buffer)
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
++    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Cmask info and coordinates
++    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Cmask address
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeCmaskCoordFromAddr
++*
++*   @brief
++*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
++*       Cmask address
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
++    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Cmask info and address
++    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Cmask coordinates
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                     F-mask functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrComputeFmaskInfo
++*
++*   @brief
++*       Compute Fmask pitch/height/depth/alignments and size in bytes
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
++    ADDR_HANDLE                             hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,  ///< [in] Fmask information
++    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut) ///< [out] Fmask pitch and height
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeFmaskAddrFromCoord
++*
++*   @brief
++*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
++    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Fmask info and coordinates
++    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Fmask address
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeFmaskCoordFromAddr
++*
++*   @brief
++*       Compute coordinates (x,y,slice,sample,plane) according to Fmask address
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
++    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
++    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Fmask info and address
++    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Fmask coordinates
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                     DCC key functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrComputeDccInfo
++*
++*   @brief
++*       Compute DCC key size, base alignment based on color surface size, tile info or tile index
++*
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
++    ADDR_HANDLE                             hLib,   ///< [in] handle of addrlib
++    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,    ///< [in] input
++    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut)   ///< [out] output
++{
++    ADDR_E_RETURNCODE returnCode;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    if (pLib != NULL)
++    {
++       returnCode = pLib->ComputeDccInfo(pIn, pOut);
++    }
++    else
++    {
++       returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////
++// Below functions are element related or helper functions
++///////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrGetVersion
++*
++*   @brief
++*       Get AddrLib version number. Client may check this return value against ADDRLIB_VERSION
++*       defined in addrinterface.h to see if there is a mismatch.
++***************************************************************************************************
++*/
++UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib)
++{
++    UINT_32 version = 0;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_ASSERT(pLib != NULL);
++
++    if (pLib)
++    {
++        version = pLib->GetVersion();
++    }
++
++    return version;
++}
++
++/**
++***************************************************************************************************
++*   AddrUseTileIndex
++*
++*   @brief
++*       Return TRUE if tileIndex is enabled in this address library
++***************************************************************************************************
++*/
++BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib)
++{
++    BOOL_32 useTileIndex = FALSE;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_ASSERT(pLib != NULL);
++
++    if (pLib)
++    {
++        useTileIndex = pLib->UseTileIndex(0);
++    }
++
++    return useTileIndex;
++}
++
++/**
++***************************************************************************************************
++*   AddrUseCombinedSwizzle
++*
++*   @brief
++*       Return TRUE if combined swizzle is enabled in this address library
++***************************************************************************************************
++*/
++BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib)
++{
++    BOOL_32 useCombinedSwizzle = FALSE;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_ASSERT(pLib != NULL);
++
++    if (pLib)
++    {
++        useCombinedSwizzle = pLib->UseCombinedSwizzle();
++    }
++
++    return useCombinedSwizzle;
++}
++
++/**
++***************************************************************************************************
++*   AddrExtractBankPipeSwizzle
++*
++*   @brief
++*       Extract Bank and Pipe swizzle from base256b
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
++    ADDR_HANDLE                                 hLib,     ///< [in] addrlib handle
++    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,      ///< [in] input structure
++    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut)     ///< [out] output structure
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ExtractBankPipeSwizzle(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrCombineBankPipeSwizzle
++*
++*   @brief
++*       Combine Bank and Pipe swizzle
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
++    ADDR_HANDLE                                 hLib,
++    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
++    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut)
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->CombineBankPipeSwizzle(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeSliceSwizzle
++*
++*   @brief
++*       Compute a swizzle for slice from a base swizzle
++*   @return
++*       ADDR_OK if no error
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
++    ADDR_HANDLE                                 hLib,
++    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*      pIn,
++    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*           pOut)
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeSliceTileSwizzle(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputeBaseSwizzle
++*
++*   @brief
++*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
++*   @return
++*       ADDR_OK if no error
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
++    ADDR_HANDLE                             hLib,
++    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
++    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut)
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputeBaseSwizzle(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   ElemFlt32ToDepthPixel
++*
++*   @brief
++*       Convert a FLT_32 value to a depth/stencil pixel value
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++*
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
++    ADDR_HANDLE                         hLib,    ///< [in] addrlib handle
++    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,     ///< [in] per-component value
++    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    if (pLib != NULL)
++    {
++        pLib->Flt32ToDepthPixel(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   ElemFlt32ToColorPixel
++*
++*   @brief
++*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++*
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
++    ADDR_HANDLE                         hLib,    ///< [in] addrlib handle
++    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,     ///< [in] format, surface number and swap value
++    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    if (pLib != NULL)
++    {
++        pLib->Flt32ToColorPixel(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   ElemGetExportNorm
++*
++*   @brief
++*       Helper function to check one format can be EXPORT_NUM,
++*       which is a register CB_COLOR_INFO.SURFACE_FORMAT.
++*       FP16 can be reported as EXPORT_NORM for rv770 in r600
++*       family
++*
++***************************************************************************************************
++*/
++BOOL_32 ADDR_API ElemGetExportNorm(
++    ADDR_HANDLE                     hLib, ///< [in] addrlib handle
++    const ELEM_GETEXPORTNORM_INPUT* pIn)  ///< [in] input structure
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++    BOOL_32 enabled = FALSE;
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        enabled = pLib->GetExportNorm(pIn);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    ADDR_ASSERT(returnCode == ADDR_OK);
++
++    return enabled;
++}
++
++/**
++***************************************************************************************************
++*   AddrConvertTileInfoToHW
++*
++*   @brief
++*       Convert tile info from real value to hardware register value
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
++    ADDR_HANDLE                             hLib, ///< [in] address lib handle
++    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,  ///< [in] tile info with real value
++    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut) ///< [out] tile info with HW register value
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ConvertTileInfoToHW(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrConvertTileIndex
++*
++*   @brief
++*       Convert tile index to tile mode/type/info
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
++    ADDR_HANDLE                          hLib, ///< [in] address lib handle
++    const ADDR_CONVERT_TILEINDEX_INPUT*  pIn,  ///< [in] input - tile index
++    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ConvertTileIndex(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrConvertTileIndex1
++*
++*   @brief
++*       Convert tile index to tile mode/type/info
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
++    ADDR_HANDLE                          hLib, ///< [in] address lib handle
++    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,  ///< [in] input - tile index
++    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ConvertTileIndex1(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrGetTileIndex
++*
++*   @brief
++*       Get tile index from tile mode/type/info
++*
++*   @return
++*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
++*
++*   @note
++*       Only meaningful for SI (and above)
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
++    ADDR_HANDLE                     hLib,
++    const ADDR_GET_TILEINDEX_INPUT* pIn,
++    ADDR_GET_TILEINDEX_OUTPUT*      pOut)
++{
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->GetTileIndex(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrComputePrtInfo
++*
++*   @brief
++*       Interface function for ComputePrtInfo
++*
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
++    ADDR_HANDLE                 hLib,
++    const ADDR_PRT_INFO_INPUT*  pIn,
++    ADDR_PRT_INFO_OUTPUT*       pOut)
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
++
++    if (pLib != NULL)
++    {
++        returnCode = pLib->ComputePrtInfo(pIn, pOut);
++    }
++    else
++    {
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/addrinterface.h b/src/gallium/winsys/radeon/amdgpu/addrlib/addrinterface.h
+new file mode 100644
+index 0000000..03fbf2b
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/addrinterface.h
+@@ -0,0 +1,2166 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrinterface.h
++* @brief Contains the addrlib interfaces declaration and parameter defines
++***************************************************************************************************
++*/
++#ifndef __ADDR_INTERFACE_H__
++#define __ADDR_INTERFACE_H__
++
++#if defined(__cplusplus)
++extern "C"
++{
++#endif
++
++#include "addrtypes.h"
++
++#define ADDRLIB_VERSION_MAJOR 5
++#define ADDRLIB_VERSION_MINOR 25
++#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR)
++
++/// Virtually all interface functions need ADDR_HANDLE as first parameter
++typedef VOID*   ADDR_HANDLE;
++
++/// Client handle used in callbacks
++typedef VOID*   ADDR_CLIENT_HANDLE;
++
++/**
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++* //                                  Callback functions
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++*    typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
++*         const ADDR_ALLOCSYSMEM_INPUT* pInput);
++*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
++*         VOID* pVirtAddr);
++*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
++*         const ADDR_DEBUGPRINT_INPUT* pInput);
++*
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++* //                               Create/Destroy/Config functions
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++*     AddrCreate()
++*     AddrDestroy()
++*
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++* //                                  Surface functions
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++*     AddrComputeSurfaceInfo()
++*     AddrComputeSurfaceAddrFromCoord()
++*     AddrComputeSurfaceCoordFromAddr()
++*
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++* //                                   HTile functions
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++*     AddrComputeHtileInfo()
++*     AddrComputeHtileAddrFromCoord()
++*     AddrComputeHtileCoordFromAddr()
++*
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++* //                                   C-mask functions
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++*     AddrComputeCmaskInfo()
++*     AddrComputeCmaskAddrFromCoord()
++*     AddrComputeCmaskCoordFromAddr()
++*
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++* //                                   F-mask functions
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++*     AddrComputeFmaskInfo()
++*     AddrComputeFmaskAddrFromCoord()
++*     AddrComputeFmaskCoordFromAddr()
++*
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++* //                               Element/Utility functions
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++*     ElemFlt32ToDepthPixel()
++*     ElemFlt32ToColorPixel()
++*     AddrExtractBankPipeSwizzle()
++*     AddrCombineBankPipeSwizzle()
++*     AddrComputeSliceSwizzle()
++*     AddrConvertTileInfoToHW()
++*     AddrConvertTileIndex()
++*     AddrConvertTileIndex1()
++*     AddrGetTileIndex()
++*     AddrComputeBaseSwizzle()
++*     AddrUseTileIndex()
++*     AddrUseCombinedSwizzle()
++*
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++* //                                    Dump functions
++* /////////////////////////////////////////////////////////////////////////////////////////////////
++*     AddrDumpSurfaceInfo()
++*     AddrDumpFmaskInfo()
++*     AddrDumpCmaskInfo()
++*     AddrDumpHtileInfo()
++*
++**/
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                      Callback functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++* @brief Alloc system memory flags.
++* @note These flags are reserved for future use and if flags are added will minimize the impact
++*       of the client.
++***************************************************************************************************
++*/
++typedef union _ADDR_ALLOCSYSMEM_FLAGS
++{
++    struct
++    {
++        UINT_32 reserved    : 32;  ///< Reserved for future use.
++    } fields;
++    UINT_32 value;
++
++} ADDR_ALLOCSYSMEM_FLAGS;
++
++/**
++***************************************************************************************************
++* @brief Alloc system memory input structure
++***************************************************************************************************
++*/
++typedef struct _ADDR_ALLOCSYSMEM_INPUT
++{
++    UINT_32                 size;           ///< Size of this structure in bytes
++
++    ADDR_ALLOCSYSMEM_FLAGS  flags;          ///< System memory flags.
++    UINT_32                 sizeInBytes;    ///< System memory allocation size in bytes.
++    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
++} ADDR_ALLOCSYSMEM_INPUT;
++
++/**
++***************************************************************************************************
++* ADDR_ALLOCSYSMEM
++*   @brief
++*       Allocate system memory callback function. Returns valid pointer on success.
++***************************************************************************************************
++*/
++typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
++    const ADDR_ALLOCSYSMEM_INPUT* pInput);
++
++/**
++***************************************************************************************************
++* @brief Free system memory input structure
++***************************************************************************************************
++*/
++typedef struct _ADDR_FREESYSMEM_INPUT
++{
++    UINT_32                 size;           ///< Size of this structure in bytes
++
++    VOID*                   pVirtAddr;      ///< Virtual address
++    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
++} ADDR_FREESYSMEM_INPUT;
++
++/**
++***************************************************************************************************
++* ADDR_FREESYSMEM
++*   @brief
++*       Free system memory callback function.
++*       Returns ADDR_OK on success.
++***************************************************************************************************
++*/
++typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
++    const ADDR_FREESYSMEM_INPUT* pInput);
++
++/**
++***************************************************************************************************
++* @brief Print debug message input structure
++***************************************************************************************************
++*/
++typedef struct _ADDR_DEBUGPRINT_INPUT
++{
++    UINT_32             size;           ///< Size of this structure in bytes
++
++    CHAR*               pDebugString;   ///< Debug print string
++    va_list             ap;             ///< Variable argument list
++    ADDR_CLIENT_HANDLE  hClient;        ///< Client handle
++} ADDR_DEBUGPRINT_INPUT;
++
++/**
++***************************************************************************************************
++* ADDR_DEBUGPRINT
++*   @brief
++*       Print debug message callback function.
++*       Returns ADDR_OK on success.
++***************************************************************************************************
++*/
++typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
++    const ADDR_DEBUGPRINT_INPUT* pInput);
++
++/**
++***************************************************************************************************
++* ADDR_CALLBACKS
++*
++*   @brief
++*       Address Library needs client to provide system memory alloc/free routines.
++***************************************************************************************************
++*/
++typedef struct _ADDR_CALLBACKS
++{
++    ADDR_ALLOCSYSMEM allocSysMem;   ///< Routine to allocate system memory
++    ADDR_FREESYSMEM  freeSysMem;    ///< Routine to free system memory
++    ADDR_DEBUGPRINT  debugPrint;    ///< Routine to print debug message
++} ADDR_CALLBACKS;
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               Create/Destroy functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++* ADDR_CREATE_FLAGS
++*
++*   @brief
++*       This structure is used to pass some setup in creation of AddrLib
++*   @note
++***************************************************************************************************
++*/
++typedef union _ADDR_CREATE_FLAGS
++{
++    struct
++    {
++        UINT_32 noCubeMipSlicesPad     : 1;    ///< Turn cubemap faces padding off
++        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
++                                               ///  output structure
++        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
++        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined tile swizzle
++        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
++        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
++        UINT_32 degradeBaseLevel       : 1;    ///< Degrade to 1D modes automatically for base level
++        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
++        UINT_32 reserved               : 24;   ///< Reserved bits for future use
++    };
++
++    UINT_32 value;
++} ADDR_CREATE_FLAGS;
++
++/**
++***************************************************************************************************
++*   ADDR_REGISTER_VALUE
++*
++*   @brief
++*       Data from registers to setup AddrLib global data, used in AddrCreate
++***************************************************************************************************
++*/
++typedef struct _ADDR_REGISTER_VALUE
++{
++    UINT_32  gbAddrConfig;       ///< For R8xx, use GB_ADDR_CONFIG register value.
++                                 ///  For R6xx/R7xx, use GB_TILING_CONFIG.
++                                 ///  But they can be treated as the same.
++                                 ///  if this value is 0, use chip to set default value
++    UINT_32  backendDisables;    ///< 1 bit per backend, starting with LSB. 1=disabled,0=enabled.
++                                 ///  Register value of CC_RB_BACKEND_DISABLE.BACKEND_DISABLE
++
++                                 ///  R800 registers-----------------------------------------------
++    UINT_32  noOfBanks;          ///< Number of h/w ram banks - For r800: MC_ARB_RAMCFG.NOOFBANK
++                                 ///  No enums for this value in h/w header files
++                                 ///  0: 4
++                                 ///  1: 8
++                                 ///  2: 16
++    UINT_32  noOfRanks;          ///  MC_ARB_RAMCFG.NOOFRANK
++                                 ///  0: 1
++                                 ///  1: 2
++                                 ///  SI (R1000) registers-----------------------------------------
++    const UINT_32* pTileConfig;  ///< Global tile setting tables
++    UINT_32  noOfEntries;        ///< Number of entries in pTileConfig
++
++                                 ///< CI registers-------------------------------------------------
++    const UINT_32* pMacroTileConfig;    ///< Global macro tile mode table
++    UINT_32  noOfMacroEntries;   ///< Number of entries in pMacroTileConfig
++
++} ADDR_REGISTER_VALUE;
++
++/**
++***************************************************************************************************
++* ADDR_CREATE_INPUT
++*
++*   @brief
++*       Parameters use to create an AddrLib Object. Caller must provide all fields.
++*
++***************************************************************************************************
++*/
++typedef struct _ADDR_CREATE_INPUT
++{
++    UINT_32             size;                ///< Size of this structure in bytes
++
++    UINT_32             chipEngine;          ///< Chip Engine
++    UINT_32             chipFamily;          ///< Chip Family
++    UINT_32             chipRevision;        ///< Chip Revision
++    ADDR_CALLBACKS      callbacks;           ///< Callbacks for sysmem alloc/free/print
++    ADDR_CREATE_FLAGS   createFlags;         ///< Flags to setup AddrLib
++    ADDR_REGISTER_VALUE regValue;            ///< Data from registers to setup AddrLib global data
++    ADDR_CLIENT_HANDLE  hClient;             ///< Client handle
++    UINT_32             minPitchAlignPixels; ///< Minimum pitch alignment in pixels
++} ADDR_CREATE_INPUT;
++
++/**
++***************************************************************************************************
++* ADDR_CREATEINFO_OUTPUT
++*
++*   @brief
++*       Return AddrLib handle to client driver
++*
++***************************************************************************************************
++*/
++typedef struct _ADDR_CREATE_OUTPUT
++{
++    UINT_32     size;    ///< Size of this structure in bytes
++
++    ADDR_HANDLE hLib;    ///< Address lib handle
++} ADDR_CREATE_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrCreate
++*
++*   @brief
++*       Create AddrLib object, must be called before any interface calls
++*
++*   @return
++*       ADDR_OK if successful
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrCreate(
++    const ADDR_CREATE_INPUT*    pAddrCreateIn,
++    ADDR_CREATE_OUTPUT*         pAddrCreateOut);
++
++
++
++/**
++***************************************************************************************************
++*   AddrDestroy
++*
++*   @brief
++*       Destroy AddrLib object, must be called to free internally allocated resources.
++*
++*   @return
++*      ADDR_OK if successful
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrDestroy(
++    ADDR_HANDLE hLib);
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                    Surface functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++* @brief
++*       Bank/tiling parameters. On function input, these can be set as desired or
++*       left 0 for AddrLib to calculate/default. On function output, these are the actual
++*       parameters used.
++* @note
++*       Valid bankWidth/bankHeight value:
++*       1,2,4,8. They are factors instead of pixels or bytes.
++*
++*       The bank number remains constant across each row of the
++*       macro tile as each pipe is selected, so the number of
++*       tiles in the x direction with the same bank number will
++*       be bank_width * num_pipes.
++***************************************************************************************************
++*/
++typedef struct _ADDR_TILEINFO
++{
++    ///  Any of these parameters can be set to 0 to use the HW default.
++    UINT_32     banks;              ///< Number of banks, numerical value
++    UINT_32     bankWidth;          ///< Number of tiles in the X direction in the same bank
++    UINT_32     bankHeight;         ///< Number of tiles in the Y direction in the same bank
++    UINT_32     macroAspectRatio;   ///< Macro tile aspect ratio. 1-1:1, 2-4:1, 4-16:1, 8-64:1
++    UINT_32     tileSplitBytes;     ///< Tile split size, in bytes
++    AddrPipeCfg pipeConfig;         ///< Pipe Config = HW enum + 1
++} ADDR_TILEINFO;
++
++// Create a define to avoid client change. The removal of R800 is because we plan to implement SI
++// within 800 HWL - An AddrPipeCfg is added in above data structure
++typedef ADDR_TILEINFO ADDR_R800_TILEINFO;
++
++/**
++***************************************************************************************************
++* @brief
++*       Information needed by quad buffer stereo support
++***************************************************************************************************
++*/
++typedef struct _ADDR_QBSTEREOINFO
++{
++    UINT_32         eyeHeight;          ///< Height (in pixel rows) to right eye
++    UINT_32         rightOffset;        ///< Offset (in bytes) to right eye
++    UINT_32         rightSwizzle;       ///< TileSwizzle for right eyes
++} ADDR_QBSTEREOINFO;
++
++/**
++***************************************************************************************************
++*   ADDR_SURFACE_FLAGS
++*
++*   @brief
++*       Surface flags
++***************************************************************************************************
++*/
++typedef union _ADDR_SURFACE_FLAGS
++{
++    struct
++    {
++        UINT_32 color         : 1; ///< Flag indicates this is a color buffer
++        UINT_32 depth         : 1; ///< Flag indicates this is a depth/stencil buffer
++        UINT_32 stencil       : 1; ///< Flag indicates this is a stencil buffer
++        UINT_32 texture       : 1; ///< Flag indicates this is a texture
++        UINT_32 cube          : 1; ///< Flag indicates this is a cubemap
++
++        UINT_32 volume        : 1; ///< Flag indicates this is a volume texture
++        UINT_32 fmask         : 1; ///< Flag indicates this is an fmask
++        UINT_32 cubeAsArray   : 1; ///< Flag indicates if treat cubemap as arrays
++        UINT_32 compressZ     : 1; ///< Flag indicates z buffer is compressed
++        UINT_32 overlay       : 1; ///< Flag indicates this is an overlay surface
++        UINT_32 noStencil     : 1; ///< Flag indicates this depth has no separate stencil
++        UINT_32 display       : 1; ///< Flag indicates this should match display controller req.
++        UINT_32 opt4Space     : 1; ///< Flag indicates this surface should be optimized for space
++                                   ///  i.e. save some memory but may lose performance
++        UINT_32 prt           : 1; ///< Flag for partially resident texture
++        UINT_32 qbStereo      : 1; ///< Quad buffer stereo surface
++        UINT_32 pow2Pad       : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
++        UINT_32 interleaved   : 1; ///< Special flag for interleaved YUV surface padding
++        UINT_32 degrade4Space : 1; ///< Degrade base level's tile mode to save memory
++        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
++        UINT_32 dispTileType  : 1; ///< NI: force display Tiling for 128 bit shared resoruce
++        UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear
++        UINT_32 czDispCompatible: 1; ///< SI+: CZ family (Carrizo) has a HW bug needs special alignment.
++                                     ///<      This flag indicates we need to follow the alignment with
++                                     ///<      CZ families or other ASICs under PX configuration + CZ.
++        UINT_32 reserved      :10; ///< Reserved bits
++    };
++
++    UINT_32 value;
++} ADDR_SURFACE_FLAGS;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_SURFACE_INFO_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeSurfaceInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT
++{
++    UINT_32             size;               ///< Size of this structure in bytes
++
++    AddrTileMode        tileMode;           ///< Tile mode
++    AddrFormat          format;             ///< If format is set to valid one, bpp/width/height
++                                            ///  might be overwritten
++    UINT_32             bpp;                ///< Bits per pixel
++    UINT_32             numSamples;         ///< Number of samples
++    UINT_32             width;              ///< Width, in pixels
++    UINT_32             height;             ///< Height, in pixels
++    UINT_32             numSlices;          ///< Number surface slice/depth,
++                                            ///  Note:
++                                            ///  For cubemap, driver clients usually set numSlices
++                                            ///  to 1 in per-face calc.
++                                            ///  For 7xx and above, we need pad faces as slices.
++                                            ///  In this case, clients should set numSlices to 6 and
++                                            ///  this is also can be turned off by createFlags when
++                                            ///  calling AddrCreate
++    UINT_32             slice;              ///< Slice index
++    UINT_32             mipLevel;           ///< Current mipmap level.
++                                            ///  Padding/tiling have different rules for level0 and
++                                            ///  sublevels
++    ADDR_SURFACE_FLAGS  flags;              ///< Surface type flags
++    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
++                                            ///  number of samples for normal AA; Set it to the
++                                            ///  number of fragments for EQAA
++    /// r800 and later HWL parameters
++    // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's
++    ADDR_TILEINFO*      pTileInfo;          ///< 2D tile parameters. Set to 0 to default/calculate
++    AddrTileType        tileType;           ///< Micro tiling type, not needed when tileIndex != -1
++    INT_32              tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
++                                            ///  while the global useTileIndex is set to 1
++    UINT_32             basePitch;          ///< Base level pitch in pixels, 0 means ignored, is a
++                                            ///  must for mip levels from SI+.
++                                            ///  Don't use pitch in blocks for compressed formats!
++} ADDR_COMPUTE_SURFACE_INFO_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_SURFACE_INFO_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeSurfInfo
++*   @note
++        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
++        Pixel: Original pixel
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT
++{
++    UINT_32         size;           ///< Size of this structure in bytes
++
++    UINT_32         pitch;          ///< Pitch in elements (in blocks for compressed formats)
++    UINT_32         height;         ///< Height in elements (in blocks for compressed formats)
++    UINT_32         depth;          ///< Number of slice/depth
++    UINT_64         surfSize;       ///< Surface size in bytes
++    AddrTileMode    tileMode;       ///< Actual tile mode. May differ from that in input
++    UINT_32         baseAlign;      ///< Base address alignment
++    UINT_32         pitchAlign;     ///< Pitch alignment, in elements
++    UINT_32         heightAlign;    ///< Height alignment, in elements
++    UINT_32         depthAlign;     ///< Depth alignment, aligned to thickness, for 3d texture
++    UINT_32         bpp;            ///< Bits per elements (e.g. blocks for BCn, 1/3 for 96bit)
++    UINT_32         pixelPitch;     ///< Pitch in original pixels
++    UINT_32         pixelHeight;    ///< Height in original pixels
++    UINT_32         pixelBits;      ///< Original bits per pixel, passed from input
++    UINT_64         sliceSize;      ///< Size of slice specified by input's slice
++                                    ///  The result is controlled by surface flags & createFlags
++                                    ///  By default this value equals to surfSize for volume
++    UINT_32         pitchTileMax;   ///< PITCH_TILE_MAX value for h/w register
++    UINT_32         heightTileMax;  ///< HEIGHT_TILE_MAX value for h/w register
++    UINT_32         sliceTileMax;   ///< SLICE_TILE_MAX value for h/w register
++
++    UINT_32         numSamples;     ///< Pass the effective numSamples processed in this call
++
++    /// r800 and later HWL parameters
++    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Filled in if 0 on input
++    AddrTileType    tileType;       ///< Micro tiling type, only valid when tileIndex != -1
++    INT_32          tileIndex;      ///< Tile index, MAY be "downgraded"
++
++    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
++    /// Special information to work around SI mipmap swizzle bug UBTS #317508
++    BOOL_32         last2DLevel;    ///< TRUE if this is the last 2D(3D) tiled
++                                    ///< Only meaningful when create flag checkLast2DLevel is set
++    /// Stereo info
++    ADDR_QBSTEREOINFO*  pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE
++} ADDR_COMPUTE_SURFACE_INFO_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeSurfaceInfo
++*
++*   @brief
++*       Compute surface width/height/depth/alignments and suitable tiling mode
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
++    ADDR_HANDLE                             hLib,
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeSurfaceAddrFromCoord
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++
++    UINT_32         x;                  ///< X coordinate
++    UINT_32         y;                  ///< Y coordinate
++    UINT_32         slice;              ///< Slice index
++    UINT_32         sample;             ///< Sample index, use fragment index for EQAA
++
++    UINT_32         bpp;                ///< Bits per pixel
++    UINT_32         pitch;              ///< Surface pitch, in pixels
++    UINT_32         height;             ///< Surface height, in pixels
++    UINT_32         numSlices;          ///< Surface depth
++    UINT_32         numSamples;         ///< Number of samples
++
++    AddrTileMode    tileMode;           ///< Tile mode
++    BOOL_32         isDepth;            ///< TRUE if the surface uses depth sample ordering within
++                                        ///  micro tile. Textures can also choose depth sample order
++    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
++                                        ///  the case that components are stored separately
++    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)
++
++    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
++                                        ///  number of samples for normal AA; Set it to the
++                                        ///  number of fragments for EQAA
++    /// r800 and later HWL parameters
++    // Used for 1D tiling above
++    AddrTileType    tileType;           ///< See defintion of AddrTileType
++    struct
++    {
++        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
++                                        ///  only flag. Only non-RT texture can set this to TRUE
++        UINT_32     reserved :31;       ///< Reserved for future use.
++    };
++    // 2D tiling needs following structure
++    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
++    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++    union
++    {
++        struct
++        {
++            UINT_32  bankSwizzle;       ///< Bank swizzle
++            UINT_32  pipeSwizzle;       ///< Pipe swizzle
++        };
++        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
++    };
++
++#if ADDR_AM_BUILD // These two fields are not valid in SW blt since no HTILE access
++    UINT_32         addr5Swizzle;       ///< ADDR5_SWIZZLE_MASK of DB_DEPTH_INFO
++    BOOL_32         is32ByteTile;       ///< Caller must have access to HTILE buffer and know if
++                                        ///  this tile is compressed to 32B
++#endif
++} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeSurfaceAddrFromCoord
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_64 addr;           ///< Byte address
++    UINT_32 bitPosition;    ///< Bit position within surfaceAddr, 0-7.
++                            ///  For surface bpp < 8, e.g. FMT_1.
++    UINT_32 prtBlockIndex;  ///< Index of a PRT tile (64K block)
++} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeSurfaceAddrFromCoord
++*
++*   @brief
++*       Compute surface address from a given coordinate.
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
++    ADDR_HANDLE                                     hLib,
++    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
++    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeSurfaceCoordFromAddr
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++
++    UINT_64         addr;               ///< Address in bytes
++    UINT_32         bitPosition;        ///< Bit position in addr. 0-7. for surface bpp < 8,
++                                        ///  e.g. FMT_1;
++    UINT_32         bpp;                ///< Bits per pixel
++    UINT_32         pitch;              ///< Pitch, in pixels
++    UINT_32         height;             ///< Height in pixels
++    UINT_32         numSlices;          ///< Surface depth
++    UINT_32         numSamples;         ///< Number of samples
++
++    AddrTileMode    tileMode;           ///< Tile mode
++    BOOL_32         isDepth;            ///< Surface uses depth sample ordering within micro tile.
++                                        ///  Note: Textures can choose depth sample order as well.
++    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
++                                        ///  the case that components are stored separately
++    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)
++
++    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
++                                        ///  number of samples for normal AA; Set it to the
++                                        ///  number of fragments for EQAA
++    /// r800 and later HWL parameters
++    // Used for 1D tiling above
++    AddrTileType    tileType;           ///< See defintion of AddrTileType
++    struct
++    {
++        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
++                                        ///  only flag. Only non-RT texture can set this to TRUE
++        UINT_32     reserved :31;       ///< Reserved for future use.
++    };
++    // 2D tiling needs following structure
++    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
++    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++    union
++    {
++        struct
++        {
++            UINT_32  bankSwizzle;       ///< Bank swizzle
++            UINT_32  pipeSwizzle;       ///< Pipe swizzle
++        };
++        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
++    };
++} ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeSurfaceCoordFromAddr
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
++{
++    UINT_32 size;   ///< Size of this structure in bytes
++
++    UINT_32 x;      ///< X coordinate
++    UINT_32 y;      ///< Y coordinate
++    UINT_32 slice;  ///< Index of slices
++    UINT_32 sample; ///< Index of samples, means fragment index for EQAA
++} ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeSurfaceCoordFromAddr
++*
++*   @brief
++*       Compute coordinate from a given surface address
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
++    ADDR_HANDLE                                     hLib,
++    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
++    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut);
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                   HTile functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   ADDR_HTILE_FLAGS
++*
++*   @brief
++*       HTILE flags
++***************************************************************************************************
++*/
++typedef union _ADDR_HTILE_FLAGS
++{
++    struct
++    {
++        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
++        UINT_32 reserved      :31; ///< Reserved bits
++    };
++
++    UINT_32 value;
++} ADDR_HTILE_FLAGS;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_HTILE_INFO_INPUT
++*
++*   @brief
++*       Input structure of AddrComputeHtileInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_HTILE_INFO_INPUT
++{
++    UINT_32            size;            ///< Size of this structure in bytes
++
++    ADDR_HTILE_FLAGS   flags;           ///< HTILE flags
++    UINT_32            pitch;           ///< Surface pitch, in pixels
++    UINT_32            height;          ///< Surface height, in pixels
++    UINT_32            numSlices;       ///< Number of slices
++    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
++    AddrHtileBlockSize blockWidth;      ///< 4 or 8. EG above only support 8
++    AddrHtileBlockSize blockHeight;     ///< 4 or 8. EG above only support 8
++    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
++
++    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
++                                        ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_HTILE_INFO_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_HTILE_INFO_OUTPUT
++*
++*   @brief
++*       Output structure of AddrComputeHtileInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_32 pitch;          ///< Pitch in pixels of depth buffer represented in this
++                            ///  HTile buffer. This might be larger than original depth
++                            ///  buffer pitch when called with an unaligned pitch.
++    UINT_32 height;         ///< Height in pixels, as above
++    UINT_64 htileBytes;     ///< Size of HTILE buffer, in bytes
++    UINT_32 baseAlign;      ///< Base alignment
++    UINT_32 bpp;            ///< Bits per pixel for HTILE is how many bits for an 8x8 block!
++    UINT_32 macroWidth;     ///< Macro width in pixels, actually squared cache shape
++    UINT_32 macroHeight;    ///< Macro height in pixels
++    UINT_64 sliceSize;      ///< Slice size, in bytes.
++} ADDR_COMPUTE_HTILE_INFO_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeHtileInfo
++*
++*   @brief
++*       Compute Htile pitch, height, base alignment and size in bytes
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
++    ADDR_HANDLE                             hLib,
++    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,
++    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeHtileAddrFromCoord
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
++{
++    UINT_32            size;            ///< Size of this structure in bytes
++
++    UINT_32            pitch;           ///< Pitch, in pixels
++    UINT_32            height;          ///< Height in pixels
++    UINT_32            x;               ///< X coordinate
++    UINT_32            y;               ///< Y coordinate
++    UINT_32            slice;           ///< Index of slice
++    UINT_32            numSlices;       ///< Number of slices
++    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
++    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
++    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
++    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
++
++    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
++                                        ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeHtileAddrFromCoord
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_64 addr;           ///< Address in bytes
++    UINT_32 bitPosition;    ///< Bit position, 0 or 4. CMASK and HTILE shares some lib method.
++                            ///  So we keep bitPosition for HTILE as well
++} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeHtileAddrFromCoord
++*
++*   @brief
++*       Compute Htile address according to coordinates (of depth buffer)
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
++    ADDR_HANDLE                                     hLib,
++    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,
++    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeHtileCoordFromAddr
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
++{
++    UINT_32            size;            ///< Size of this structure in bytes
++
++    UINT_64            addr;            ///< Address
++    UINT_32            bitPosition;     ///< Bit position 0 or 4. CMASK and HTILE share some methods
++                                        ///  so we keep bitPosition for HTILE as well
++    UINT_32            pitch;           ///< Pitch, in pixels
++    UINT_32            height;          ///< Height, in pixels
++    UINT_32            numSlices;       ///< Number of slices
++    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
++    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
++    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
++    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
++
++    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
++                                        ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeHtileCoordFromAddr
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
++{
++    UINT_32 size;   ///< Size of this structure in bytes
++
++    UINT_32 x;      ///< X coordinate
++    UINT_32 y;      ///< Y coordinate
++    UINT_32 slice;  ///< Slice index
++} ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeHtileCoordFromAddr
++*
++*   @brief
++*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
++*       Htile address
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
++    ADDR_HANDLE                                     hLib,
++    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,
++    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut);
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                     C-mask functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   ADDR_CMASK_FLAGS
++*
++*   @brief
++*       CMASK flags
++***************************************************************************************************
++*/
++typedef union _ADDR_CMASK_FLAGS
++{
++    struct
++    {
++        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
++        UINT_32 reserved      :31; ///< Reserved bits
++    };
++
++    UINT_32 value;
++} ADDR_CMASK_FLAGS;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_CMASK_INFO_INPUT
++*
++*   @brief
++*       Input structure of AddrComputeCmaskInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_CMASKINFO_INPUT
++{
++    UINT_32             size;            ///< Size of this structure in bytes
++
++    ADDR_CMASK_FLAGS    flags;           ///< CMASK flags
++    UINT_32             pitch;           ///< Pitch, in pixels, of color buffer
++    UINT_32             height;          ///< Height, in pixels, of color buffer
++    UINT_32             numSlices;       ///< Number of slices, of color buffer
++    BOOL_32             isLinear;        ///< Linear or tiled layout, Only SI can be linear
++    ADDR_TILEINFO*      pTileInfo;       ///< Tile info
++
++    INT_32              tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
++                                         ///  while the global useTileIndex is set to 1
++    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
++                                         ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_CMASK_INFO_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_CMASK_INFO_OUTPUT
++*
++*   @brief
++*       Output structure of AddrComputeCmaskInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_CMASK_INFO_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_32 pitch;          ///< Pitch in pixels of color buffer which
++                            ///  this Cmask matches. The size might be larger than
++                            ///  original color buffer pitch when called with
++                            ///  an unaligned pitch.
++    UINT_32 height;         ///< Height in pixels, as above
++    UINT_64 cmaskBytes;     ///< Size in bytes of CMask buffer
++    UINT_32 baseAlign;      ///< Base alignment
++    UINT_32 blockMax;       ///< Cmask block size. Need this to set CB_COLORn_MASK register
++    UINT_32 macroWidth;     ///< Macro width in pixels, actually squared cache shape
++    UINT_32 macroHeight;    ///< Macro height in pixels
++    UINT_64 sliceSize;      ///< Slice size, in bytes.
++} ADDR_COMPUTE_CMASK_INFO_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeCmaskInfo
++*
++*   @brief
++*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
++*       info
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
++    ADDR_HANDLE                             hLib,
++    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,
++    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeCmaskAddrFromCoord
++*
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
++{
++    UINT_32          size;           ///< Size of this structure in bytes
++    UINT_32          x;              ///< X coordinate
++    UINT_32          y;              ///< Y coordinate
++    UINT_64          fmaskAddr;      ///< Fmask addr for tc compatible Cmask
++    UINT_32          slice;          ///< Slice index
++    UINT_32          pitch;          ///< Pitch in pixels, of color buffer
++    UINT_32          height;         ///< Height in pixels, of color buffer
++    UINT_32          numSlices;      ///< Number of slices
++    UINT_32          bpp;
++    BOOL_32          isLinear;       ///< Linear or tiled layout, Only SI can be linear
++    ADDR_CMASK_FLAGS flags;          ///< CMASK flags
++    ADDR_TILEINFO*   pTileInfo;      ///< Tile info
++
++    INT_32           tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
++                                     ///< while the global useTileIndex is set to 1
++    INT_32           macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
++                                     ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeCmaskAddrFromCoord
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_64 addr;           ///< CMASK address in bytes
++    UINT_32 bitPosition;    ///< Bit position within addr, 0-7. CMASK is 4 bpp,
++                            ///  so the address may be located in bit 0 (0) or 4 (4)
++} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeCmaskAddrFromCoord
++*
++*   @brief
++*       Compute Cmask address according to coordinates (of MSAA color buffer)
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
++    ADDR_HANDLE                                     hLib,
++    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,
++    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeCmaskCoordFromAddr
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
++{
++    UINT_32        size;            ///< Size of this structure in bytes
++
++    UINT_64        addr;            ///< CMASK address in bytes
++    UINT_32        bitPosition;     ///< Bit position within addr, 0-7. CMASK is 4 bpp,
++                                    ///  so the address may be located in bit 0 (0) or 4 (4)
++    UINT_32        pitch;           ///< Pitch, in pixels
++    UINT_32        height;          ///< Height in pixels
++    UINT_32        numSlices;       ///< Number of slices
++    BOOL_32        isLinear;        ///< Linear or tiled layout, Only SI can be linear
++    ADDR_TILEINFO* pTileInfo;       ///< Tile info
++
++    INT_32         tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
++                                    ///  while the global useTileIndex is set to 1
++    INT_32         macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
++                                    ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeCmaskCoordFromAddr
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
++{
++    UINT_32 size;   ///< Size of this structure in bytes
++
++    UINT_32 x;      ///< X coordinate
++    UINT_32 y;      ///< Y coordinate
++    UINT_32 slice;  ///< Slice index
++} ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeCmaskCoordFromAddr
++*
++*   @brief
++*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
++*       Cmask address
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
++    ADDR_HANDLE                                     hLib,
++    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,
++    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut);
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                     F-mask functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_FMASK_INFO_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeFmaskInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_FMASK_INFO_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++
++    AddrTileMode    tileMode;           ///< Tile mode
++    UINT_32         pitch;              ///< Surface pitch, in pixels
++    UINT_32         height;             ///< Surface height, in pixels
++    UINT_32         numSlices;          ///< Number of slice/depth
++    UINT_32         numSamples;         ///< Number of samples
++    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
++                                        ///  number of samples for normal AA; Set it to the
++                                        ///  number of fragments for EQAA
++    /// r800 and later HWL parameters
++    struct
++    {
++        UINT_32 resolved:   1;          ///< TRUE if the surface is for resolved fmask, only used
++                                        ///  by H/W clients. S/W should always set it to FALSE.
++        UINT_32 reserved:  31;          ///< Reserved for future use.
++    };
++    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Clients must give valid data
++    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++} ADDR_COMPUTE_FMASK_INFO_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_FMASK_INFO_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeFmaskInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_FMASK_INFO_OUTPUT
++{
++    UINT_32         size;           ///< Size of this structure in bytes
++
++    UINT_32         pitch;          ///< Pitch of fmask in pixels
++    UINT_32         height;         ///< Height of fmask in pixels
++    UINT_32         numSlices;      ///< Slices of fmask
++    UINT_64         fmaskBytes;     ///< Size of fmask in bytes
++    UINT_32         baseAlign;      ///< Base address alignment
++    UINT_32         pitchAlign;     ///< Pitch alignment
++    UINT_32         heightAlign;    ///< Height alignment
++    UINT_32         bpp;            ///< Bits per pixel of FMASK is: number of bit planes
++    UINT_32         numSamples;     ///< Number of samples, used for dump, export this since input
++                                    ///  may be changed in 9xx and above
++    /// r800 and later HWL parameters
++    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Fmask can have different
++                                    ///  bank_height from color buffer
++    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
++                                    ///  while the global useTileIndex is set to 1
++    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
++    UINT_64         sliceSize;      ///< Size of slice in bytes
++} ADDR_COMPUTE_FMASK_INFO_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeFmaskInfo
++*
++*   @brief
++*       Compute Fmask pitch/height/depth/alignments and size in bytes
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
++    ADDR_HANDLE                             hLib,
++    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,
++    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeFmaskAddrFromCoord
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++
++    UINT_32         x;                  ///< X coordinate
++    UINT_32         y;                  ///< Y coordinate
++    UINT_32         slice;              ///< Slice index
++    UINT_32         plane;              ///< Plane number
++    UINT_32         sample;             ///< Sample index (fragment index for EQAA)
++
++    UINT_32         pitch;              ///< Surface pitch, in pixels
++    UINT_32         height;             ///< Surface height, in pixels
++    UINT_32         numSamples;         ///< Number of samples
++    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
++                                        ///  number of samples for normal AA; Set it to the
++                                        ///  number of fragments for EQAA
++
++    AddrTileMode    tileMode;           ///< Tile mode
++    union
++    {
++        struct
++        {
++            UINT_32  bankSwizzle;       ///< Bank swizzle
++            UINT_32  pipeSwizzle;       ///< Pipe swizzle
++        };
++        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
++    };
++
++    /// r800 and later HWL parameters
++    struct
++    {
++        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by H/W clients
++        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
++        UINT_32 reserved:  30;          ///< Reserved for future use.
++    };
++    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Client must provide all data
++
++} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeFmaskAddrFromCoord
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_64 addr;           ///< Fmask address
++    UINT_32 bitPosition;    ///< Bit position within fmaskAddr, 0-7.
++} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeFmaskAddrFromCoord
++*
++*   @brief
++*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
++    ADDR_HANDLE                                     hLib,
++    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,
++    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
++*
++*   @brief
++*       Input structure for AddrComputeFmaskCoordFromAddr
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++
++    UINT_64         addr;               ///< Address
++    UINT_32         bitPosition;        ///< Bit position within addr, 0-7.
++
++    UINT_32         pitch;              ///< Pitch, in pixels
++    UINT_32         height;             ///< Height in pixels
++    UINT_32         numSamples;         ///< Number of samples
++    UINT_32         numFrags;           ///< Number of fragments
++    AddrTileMode    tileMode;           ///< Tile mode
++    union
++    {
++        struct
++        {
++            UINT_32  bankSwizzle;       ///< Bank swizzle
++            UINT_32  pipeSwizzle;       ///< Pipe swizzle
++        };
++        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
++    };
++
++    /// r800 and later HWL parameters
++    struct
++    {
++        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by HW components
++        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
++        UINT_32 reserved:  30;          ///< Reserved for future use.
++    };
++    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
++
++} ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
++*
++*   @brief
++*       Output structure for AddrComputeFmaskCoordFromAddr
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
++{
++    UINT_32 size;       ///< Size of this structure in bytes
++
++    UINT_32 x;          ///< X coordinate
++    UINT_32 y;          ///< Y coordinate
++    UINT_32 slice;      ///< Slice index
++    UINT_32 plane;      ///< Plane number
++    UINT_32 sample;     ///< Sample index (fragment index for EQAA)
++} ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeFmaskCoordFromAddr
++*
++*   @brief
++*       Compute FMASK coordinate from an given address
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
++    ADDR_HANDLE                                     hLib,
++    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,
++    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut);
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                          Element/utility functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrGetVersion
++*
++*   @brief
++*       Get AddrLib version number
++***************************************************************************************************
++*/
++UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib);
++
++/**
++***************************************************************************************************
++*   AddrUseTileIndex
++*
++*   @brief
++*       Return TRUE if tileIndex is enabled in this address library
++***************************************************************************************************
++*/
++BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib);
++
++/**
++***************************************************************************************************
++*   AddrUseCombinedSwizzle
++*
++*   @brief
++*       Return TRUE if combined swizzle is enabled in this address library
++***************************************************************************************************
++*/
++BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib);
++
++/**
++***************************************************************************************************
++*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
++*
++*   @brief
++*       Input structure of AddrExtractBankPipeSwizzle
++***************************************************************************************************
++*/
++typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
++{
++    UINT_32         size;           ///< Size of this structure in bytes
++
++    UINT_32         base256b;       ///< Base256b value
++
++    /// r800 and later HWL parameters
++    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data
++
++    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
++                                    ///  while the global useTileIndex is set to 1
++    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
++                                    ///< README: When tileIndex is not -1, this must be valid
++} ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
++*
++*   @brief
++*       Output structure of AddrExtractBankPipeSwizzle
++***************************************************************************************************
++*/
++typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_32 bankSwizzle;    ///< Bank swizzle
++    UINT_32 pipeSwizzle;    ///< Pipe swizzle
++} ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrExtractBankPipeSwizzle
++*
++*   @brief
++*       Extract Bank and Pipe swizzle from base256b
++*   @return
++*       ADDR_OK if no error
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
++    ADDR_HANDLE                                 hLib,
++    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,
++    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut);
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
++*
++*   @brief
++*       Input structure of AddrCombineBankPipeSwizzle
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
++{
++    UINT_32         size;           ///< Size of this structure in bytes
++
++    UINT_32         bankSwizzle;    ///< Bank swizzle
++    UINT_32         pipeSwizzle;    ///< Pipe swizzle
++    UINT_64         baseAddr;       ///< Base address (leave it zero for driver clients)
++
++    /// r800 and later HWL parameters
++    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data
++
++    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
++                                    ///  while the global useTileIndex is set to 1
++    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
++                                    ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
++*
++*   @brief
++*       Output structure of AddrCombineBankPipeSwizzle
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_32 tileSwizzle;    ///< Combined swizzle
++} ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrCombineBankPipeSwizzle
++*
++*   @brief
++*       Combine Bank and Pipe swizzle
++*   @return
++*       ADDR_OK if no error
++*   @note
++*       baseAddr here is full MCAddress instead of base256b
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
++    ADDR_HANDLE                                 hLib,
++    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
++    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_SLICESWIZZLE_INPUT
++*
++*   @brief
++*       Input structure of AddrComputeSliceSwizzle
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++
++    AddrTileMode    tileMode;           ///< Tile Mode
++    UINT_32         baseSwizzle;        ///< Base tile swizzle
++    UINT_32         slice;              ///< Slice index
++    UINT_64         baseAddr;           ///< Base address, driver should leave it 0 in most cases
++
++    /// r800 and later HWL parameters
++    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Actually banks needed here!
++
++    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
++                                        ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_SLICESWIZZLE_INPUT;
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
++*
++*   @brief
++*       Output structure of AddrComputeSliceSwizzle
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
++{
++    UINT_32  size;           ///< Size of this structure in bytes
++
++    UINT_32  tileSwizzle;    ///< Recalculated tileSwizzle value
++} ADDR_COMPUTE_SLICESWIZZLE_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeSliceSwizzle
++*
++*   @brief
++*       Extract Bank and Pipe swizzle from base256b
++*   @return
++*       ADDR_OK if no error
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
++    ADDR_HANDLE                             hLib,
++    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
++    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut);
++
++
++/**
++***************************************************************************************************
++*   AddrSwizzleGenOption
++*
++*   @brief
++*       Which swizzle generating options: legacy or linear
++***************************************************************************************************
++*/
++typedef enum _AddrSwizzleGenOption
++{
++    ADDR_SWIZZLE_GEN_DEFAULT    = 0,    ///< As is in client driver implemention for swizzle
++    ADDR_SWIZZLE_GEN_LINEAR     = 1,    ///< Using a linear increment of swizzle
++} AddrSwizzleGenOption;
++
++/**
++***************************************************************************************************
++*   AddrSwizzleOption
++*
++*   @brief
++*       Controls how swizzle is generated
++***************************************************************************************************
++*/
++typedef union _ADDR_SWIZZLE_OPTION
++{
++    struct
++    {
++        UINT_32 genOption       : 1;    ///< The way swizzle is generated, see AddrSwizzleGenOption
++        UINT_32 reduceBankBit   : 1;    ///< TRUE if we need reduce swizzle bits
++        UINT_32 reserved        :30;    ///< Reserved bits
++    };
++
++    UINT_32 value;
++
++} ADDR_SWIZZLE_OPTION;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_BASE_SWIZZLE_INPUT
++*
++*   @brief
++*       Input structure of AddrComputeBaseSwizzle
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_INPUT
++{
++    UINT_32             size;           ///< Size of this structure in bytes
++
++    ADDR_SWIZZLE_OPTION option;         ///< Swizzle option
++    UINT_32             surfIndex;      ///< Index of this surface type
++    AddrTileMode        tileMode;       ///< Tile Mode
++
++    /// r800 and later HWL parameters
++    ADDR_TILEINFO*      pTileInfo;      ///< 2D tile parameters. Actually banks needed here!
++
++    INT_32              tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++    INT_32              macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
++                                        ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_BASE_SWIZZLE_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
++*
++*   @brief
++*       Output structure of AddrComputeBaseSwizzle
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_32 tileSwizzle;    ///< Combined swizzle
++} ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeBaseSwizzle
++*
++*   @brief
++*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
++*   @return
++*       ADDR_OK if no error
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
++    ADDR_HANDLE                             hLib,
++    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
++    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ELEM_GETEXPORTNORM_INPUT
++*
++*   @brief
++*       Input structure for ElemGetExportNorm
++*
++***************************************************************************************************
++*/
++typedef struct _ELEM_GETEXPORTNORM_INPUT
++{
++    UINT_32             size;       ///< Size of this structure in bytes
++
++    AddrColorFormat     format;     ///< Color buffer format; Client should use ColorFormat
++    AddrSurfaceNumber   num;        ///< Surface number type; Client should use NumberType
++    AddrSurfaceSwap     swap;       ///< Surface swap byte swap; Client should use SurfaceSwap
++    UINT_32             numSamples; ///< Number of samples
++} ELEM_GETEXPORTNORM_INPUT;
++
++/**
++***************************************************************************************************
++*  ElemGetExportNorm
++*
++*   @brief
++*       Helper function to check one format can be EXPORT_NUM, which is a register
++*       CB_COLOR_INFO.SURFACE_FORMAT. FP16 can be reported as EXPORT_NORM for rv770 in r600
++*       family
++*   @note
++*       The implementation is only for r600.
++*       00 - EXPORT_FULL: PS exports are 4 pixels with 4 components with 32-bits-per-component. (two
++*       clocks per export)
++*       01 - EXPORT_NORM: PS exports are 4 pixels with 4 components with 16-bits-per-component. (one
++*       clock per export)
++*
++***************************************************************************************************
++*/
++BOOL_32 ADDR_API ElemGetExportNorm(
++    ADDR_HANDLE                     hLib,
++    const ELEM_GETEXPORTNORM_INPUT* pIn);
++
++
++
++/**
++***************************************************************************************************
++*   ELEM_FLT32TODEPTHPIXEL_INPUT
++*
++*   @brief
++*       Input structure for addrFlt32ToDepthPixel
++*
++***************************************************************************************************
++*/
++typedef struct _ELEM_FLT32TODEPTHPIXEL_INPUT
++{
++    UINT_32         size;           ///< Size of this structure in bytes
++
++    AddrDepthFormat format;         ///< Depth buffer format
++    ADDR_FLT_32     comps[2];       ///< Component values (Z/stencil)
++} ELEM_FLT32TODEPTHPIXEL_INPUT;
++
++/**
++***************************************************************************************************
++*   ELEM_FLT32TODEPTHPIXEL_INPUT
++*
++*   @brief
++*       Output structure for ElemFlt32ToDepthPixel
++*
++***************************************************************************************************
++*/
++typedef struct _ELEM_FLT32TODEPTHPIXEL_OUTPUT
++{
++    UINT_32 size;           ///< Size of this structure in bytes
++
++    UINT_8* pPixel;         ///< Real depth value. Same data type as depth buffer.
++                            ///  Client must provide enough storage for this type.
++    UINT_32 depthBase;      ///< Tile base in bits for depth bits
++    UINT_32 stencilBase;    ///< Tile base in bits for stencil bits
++    UINT_32 depthBits;      ///< Bits for depth
++    UINT_32 stencilBits;    ///< Bits for stencil
++} ELEM_FLT32TODEPTHPIXEL_OUTPUT;
++
++/**
++***************************************************************************************************
++*   ElemFlt32ToDepthPixel
++*
++*   @brief
++*       Convert a FLT_32 value to a depth/stencil pixel value
++*
++*   @return
++*       Return code
++*
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
++    ADDR_HANDLE                         hLib,
++    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
++    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ELEM_FLT32TOCOLORPIXEL_INPUT
++*
++*   @brief
++*       Input structure for addrFlt32ToColorPixel
++*
++***************************************************************************************************
++*/
++typedef struct _ELEM_FLT32TOCOLORPIXEL_INPUT
++{
++    UINT_32            size;           ///< Size of this structure in bytes
++
++    AddrColorFormat    format;         ///< Color buffer format
++    AddrSurfaceNumber  surfNum;        ///< Surface number
++    AddrSurfaceSwap    surfSwap;       ///< Surface swap
++    ADDR_FLT_32        comps[4];       ///< Component values (r/g/b/a)
++} ELEM_FLT32TOCOLORPIXEL_INPUT;
++
++/**
++***************************************************************************************************
++*   ELEM_FLT32TOCOLORPIXEL_INPUT
++*
++*   @brief
++*       Output structure for ElemFlt32ToColorPixel
++*
++***************************************************************************************************
++*/
++typedef struct _ELEM_FLT32TOCOLORPIXEL_OUTPUT
++{
++    UINT_32 size;       ///< Size of this structure in bytes
++
++    UINT_8* pPixel;     ///< Real color value. Same data type as color buffer.
++                        ///  Client must provide enough storage for this type.
++} ELEM_FLT32TOCOLORPIXEL_OUTPUT;
++
++/**
++***************************************************************************************************
++*   ElemFlt32ToColorPixel
++*
++*   @brief
++*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
++*
++*   @return
++*       Return code
++*
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
++    ADDR_HANDLE                         hLib,
++    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
++    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut);
++
++
++/**
++***************************************************************************************************
++*   ADDR_CONVERT_TILEINFOTOHW_INPUT
++*
++*   @brief
++*       Input structure for AddrConvertTileInfoToHW
++*   @note
++*       When reverse is TRUE, indices are igonred
++***************************************************************************************************
++*/
++typedef struct _ADDR_CONVERT_TILEINFOTOHW_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++    BOOL_32         reverse;            ///< Convert control flag.
++                                        ///  FALSE: convert from real value to HW value;
++                                        ///  TRUE: convert from HW value to real value.
++
++    /// r800 and later HWL parameters
++    ADDR_TILEINFO*  pTileInfo;          ///< Tile parameters with real value
++
++    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
++                                        ///  while the global useTileIndex is set to 1
++    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
++                                        ///< README: When tileIndex is not -1, this must be valid
++} ADDR_CONVERT_TILEINFOTOHW_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_CONVERT_TILEINFOTOHW_OUTPUT
++*
++*   @brief
++*       Output structure for AddrConvertTileInfoToHW
++***************************************************************************************************
++*/
++typedef struct _ADDR_CONVERT_TILEINFOTOHW_OUTPUT
++{
++    UINT_32             size;               ///< Size of this structure in bytes
++
++    /// r800 and later HWL parameters
++    ADDR_TILEINFO*      pTileInfo;          ///< Tile parameters with hardware register value
++
++} ADDR_CONVERT_TILEINFOTOHW_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrConvertTileInfoToHW
++*
++*   @brief
++*       Convert tile info from real value to hardware register value
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
++    ADDR_HANDLE                             hLib,
++    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,
++    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_CONVERT_TILEINDEX_INPUT
++*
++*   @brief
++*       Input structure for AddrConvertTileIndex
++***************************************************************************************************
++*/
++typedef struct _ADDR_CONVERT_TILEINDEX_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++
++    INT_32          tileIndex;          ///< Tile index
++    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
++    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
++} ADDR_CONVERT_TILEINDEX_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_CONVERT_TILEINDEX_OUTPUT
++*
++*   @brief
++*       Output structure for AddrConvertTileIndex
++***************************************************************************************************
++*/
++typedef struct _ADDR_CONVERT_TILEINDEX_OUTPUT
++{
++    UINT_32             size;           ///< Size of this structure in bytes
++
++    AddrTileMode        tileMode;       ///< Tile mode
++    AddrTileType        tileType;       ///< Tile type
++    ADDR_TILEINFO*      pTileInfo;      ///< Tile info
++
++} ADDR_CONVERT_TILEINDEX_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrConvertTileIndex
++*
++*   @brief
++*       Convert tile index to tile mode/type/info
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
++    ADDR_HANDLE                         hLib,
++    const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
++    ADDR_CONVERT_TILEINDEX_OUTPUT*      pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_CONVERT_TILEINDEX1_INPUT
++*
++*   @brief
++*       Input structure for AddrConvertTileIndex1 (without macro mode index)
++***************************************************************************************************
++*/
++typedef struct _ADDR_CONVERT_TILEINDEX1_INPUT
++{
++    UINT_32         size;               ///< Size of this structure in bytes
++
++    INT_32          tileIndex;          ///< Tile index
++    UINT_32         bpp;                ///< Bits per pixel
++    UINT_32         numSamples;         ///< Number of samples
++    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
++} ADDR_CONVERT_TILEINDEX1_INPUT;
++
++/**
++***************************************************************************************************
++*   AddrConvertTileIndex1
++*
++*   @brief
++*       Convert tile index to tile mode/type/info
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
++    ADDR_HANDLE                             hLib,
++    const ADDR_CONVERT_TILEINDEX1_INPUT*    pIn,
++    ADDR_CONVERT_TILEINDEX_OUTPUT*          pOut);
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_GET_TILEINDEX_INPUT
++*
++*   @brief
++*       Input structure for AddrGetTileIndex
++***************************************************************************************************
++*/
++typedef struct _ADDR_GET_TILEINDEX_INPUT
++{
++    UINT_32         size;           ///< Size of this structure in bytes
++
++    AddrTileMode    tileMode;       ///< Tile mode
++    AddrTileType    tileType;       ///< Tile-type: disp/non-disp/...
++    ADDR_TILEINFO*  pTileInfo;      ///< Pointer to tile-info structure, can be NULL for linear/1D
++} ADDR_GET_TILEINDEX_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_GET_TILEINDEX_OUTPUT
++*
++*   @brief
++*       Output structure for AddrGetTileIndex
++***************************************************************************************************
++*/
++typedef struct _ADDR_GET_TILEINDEX_OUTPUT
++{
++    UINT_32         size;           ///< Size of this structure in bytes
++
++    INT_32          index;          ///< index in table
++} ADDR_GET_TILEINDEX_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrGetTileIndex
++*
++*   @brief
++*       Get the tiling mode index in table
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
++    ADDR_HANDLE                     hLib,
++    const ADDR_GET_TILEINDEX_INPUT* pIn,
++    ADDR_GET_TILEINDEX_OUTPUT*      pOut);
++
++
++
++
++/**
++***************************************************************************************************
++*   ADDR_PRT_INFO_INPUT
++*
++*   @brief
++*       Input structure for AddrComputePrtInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_PRT_INFO_INPUT
++{
++    AddrFormat          format;        ///< Surface format
++    UINT_32             baseMipWidth;  ///< Base mipmap width
++    UINT_32             baseMipHeight; ///< Base mipmap height
++    UINT_32             baseMipDepth;  ///< Base mipmap depth
++    UINT_32             numFrags;      ///< Number of fragments,
++} ADDR_PRT_INFO_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_PRT_INFO_OUTPUT
++*
++*   @brief
++*       Input structure for AddrComputePrtInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_PRT_INFO_OUTPUT
++{
++    UINT_32             prtTileWidth;
++    UINT_32             prtTileHeight;
++} ADDR_PRT_INFO_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputePrtInfo
++*
++*   @brief
++*       Compute prt surface related information
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
++    ADDR_HANDLE                 hLib,
++    const ADDR_PRT_INFO_INPUT*  pIn,
++    ADDR_PRT_INFO_OUTPUT*       pOut);
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                                     DCC key functions
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   _ADDR_COMPUTE_DCCINFO_INPUT
++*
++*   @brief
++*       Input structure of AddrComputeDccInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_DCCINFO_INPUT
++{
++    UINT_32             size;            ///< Size of this structure in bytes
++    UINT_32             bpp;             ///< BitPP of color surface
++    UINT_32             numSamples;      ///< Sample number of color surface
++    UINT_64             colorSurfSize;   ///< Size of color surface to which dcc key is bound
++    AddrTileMode        tileMode;        ///< Tile mode of color surface
++    ADDR_TILEINFO       tileInfo;        ///< Tile info of color surface
++    UINT_32             tileSwizzle;     ///< Tile swizzle
++    INT_32              tileIndex;       ///< Tile index of color surface,
++                                         ///< MUST be -1 if you don't want to use it
++                                         ///< while the global useTileIndex is set to 1
++    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
++                                         ///< README: When tileIndex is not -1, this must be valid
++} ADDR_COMPUTE_DCCINFO_INPUT;
++
++/**
++***************************************************************************************************
++*   ADDR_COMPUTE_DCCINFO_OUTPUT
++*
++*   @brief
++*       Output structure of AddrComputeDccInfo
++***************************************************************************************************
++*/
++typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT
++{
++    UINT_32 size;                 ///< Size of this structure in bytes
++    UINT_64 dccRamBaseAlign;      ///< Base alignment of dcc key
++    UINT_64 dccRamSize;           ///< Size of dcc key
++    UINT_64 dccFastClearSize;     ///< Size of dcc key portion that can be fast cleared
++    BOOL_32 subLvlCompressible;   ///< whether sub resource is compressiable
++} ADDR_COMPUTE_DCCINFO_OUTPUT;
++
++/**
++***************************************************************************************************
++*   AddrComputeDccInfo
++*
++*   @brief
++*       Compute DCC key size, base alignment
++*       info
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
++    ADDR_HANDLE                             hLib,
++    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,
++    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut);
++
++#if defined(__cplusplus)
++}
++#endif
++
++#endif // __ADDR_INTERFACE_H__
++
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/addrtypes.h b/src/gallium/winsys/radeon/amdgpu/addrlib/addrtypes.h
+new file mode 100644
+index 0000000..4c68ac5
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/addrtypes.h
+@@ -0,0 +1,590 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrtypes.h
++* @brief Contains the helper function and constants
++***************************************************************************************************
++*/
++#ifndef __ADDR_TYPES_H__
++#define __ADDR_TYPES_H__
++
++#if defined(__APPLE__) || defined(TCORE_BUILD)
++// External definitions header maintained by Mac driver team (and TCORE team)
++// Helps address compilation issues & reduces code covered by NDA
++#include "addrExtDef.h"
++
++#else
++
++// Windows and/or Linux
++#if !defined(VOID)
++typedef void           VOID;
++#endif
++
++#if !defined(FLOAT)
++typedef float          FLOAT;
++#endif
++
++#if !defined(CHAR)
++typedef char           CHAR;
++#endif
++
++#if !defined(INT)
++typedef int            INT;
++#endif
++
++#include <stdarg.h> // va_list...etc need this header
++
++#endif // defined (__APPLE__)
++
++/**
++***************************************************************************************************
++*   Calling conventions
++***************************************************************************************************
++*/
++#ifndef ADDR_CDECL
++    #if defined(__GNUC__)
++        #define ADDR_CDECL __attribute__((cdecl))
++    #else
++        #define ADDR_CDECL __cdecl
++    #endif
++#endif
++
++#ifndef ADDR_STDCALL
++    #if defined(__GNUC__)
++        #if defined(__AMD64__)
++            #define ADDR_STDCALL
++        #else
++            #define ADDR_STDCALL __attribute__((stdcall))
++        #endif
++    #else
++        #define ADDR_STDCALL __stdcall
++    #endif
++#endif
++
++#ifndef ADDR_FASTCALL
++    #if defined(__GNUC__)
++        #define ADDR_FASTCALL __attribute__((regparm(0)))
++    #else
++        #define ADDR_FASTCALL __fastcall
++    #endif
++#endif
++
++#ifndef GC_CDECL
++    #define GC_CDECL  ADDR_CDECL
++#endif
++
++#ifndef GC_STDCALL
++    #define GC_STDCALL  ADDR_STDCALL
++#endif
++
++#ifndef GC_FASTCALL
++    #define GC_FASTCALL  ADDR_FASTCALL
++#endif
++
++
++#if defined(__GNUC__)
++    #define ADDR_INLINE static inline   // inline needs to be static to link
++#else
++    // win32, win64, other platforms
++    #define ADDR_INLINE   __inline
++#endif // #if defined(__GNUC__)
++
++#define ADDR_API ADDR_FASTCALL //default call convention is fast call
++
++/**
++***************************************************************************************************
++* Global defines used by other modules
++***************************************************************************************************
++*/
++#if !defined(TILEINDEX_INVALID)
++#define TILEINDEX_INVALID                -1
++#endif
++
++#if !defined(TILEINDEX_LINEAR_GENERAL)
++#define TILEINDEX_LINEAR_GENERAL         -2
++#endif
++
++#if !defined(TILEINDEX_LINEAR_ALIGNED)
++#define TILEINDEX_LINEAR_ALIGNED          8
++#endif
++
++/**
++***************************************************************************************************
++* Return codes
++***************************************************************************************************
++*/
++typedef enum _ADDR_E_RETURNCODE
++{
++    // General Return
++    ADDR_OK    = 0,
++    ADDR_ERROR = 1,
++
++    // Specific Errors
++    ADDR_OUTOFMEMORY,
++    ADDR_INVALIDPARAMS,
++    ADDR_NOTSUPPORTED,
++    ADDR_NOTIMPLEMENTED,
++    ADDR_PARAMSIZEMISMATCH,
++    ADDR_INVALIDGBREGVALUES,
++
++} ADDR_E_RETURNCODE;
++
++/**
++***************************************************************************************************
++* @brief
++*   Neutral enums that define tile modes for all H/W
++* @note
++*   R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from
++*   ADDR_TM_2D_TILED_XTHICK
++*
++***************************************************************************************************
++*/
++typedef enum _AddrTileMode
++{
++    ADDR_TM_LINEAR_GENERAL      = 0,    ///< Least restrictions, pitch: multiple of 8 if not buffer
++    ADDR_TM_LINEAR_ALIGNED      = 1,    ///< Requests pitch or slice to be multiple of 64 pixels
++    ADDR_TM_1D_TILED_THIN1      = 2,    ///< Linear array of 8x8 tiles
++    ADDR_TM_1D_TILED_THICK      = 3,    ///< Linear array of 8x8x4 tiles
++    ADDR_TM_2D_TILED_THIN1      = 4,    ///< A set of macro tiles consist of 8x8 tiles
++    ADDR_TM_2D_TILED_THIN2      = 5,    ///< 600 HWL only, macro tile ratio is 1:4
++    ADDR_TM_2D_TILED_THIN4      = 6,    ///< 600 HWL only, macro tile ratio is 1:16
++    ADDR_TM_2D_TILED_THICK      = 7,    ///< A set of macro tiles consist of 8x8x4 tiles
++    ADDR_TM_2B_TILED_THIN1      = 8,    ///< 600 HWL only, with bank swap
++    ADDR_TM_2B_TILED_THIN2      = 9,    ///< 600 HWL only, with bank swap and ratio is 1:4
++    ADDR_TM_2B_TILED_THIN4      = 10,   ///< 600 HWL only, with bank swap and ratio is 1:16
++    ADDR_TM_2B_TILED_THICK      = 11,   ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles
++    ADDR_TM_3D_TILED_THIN1      = 12,   ///< Macro tiling w/ pipe rotation between slices
++    ADDR_TM_3D_TILED_THICK      = 13,   ///< Macro tiling w/ pipe rotation bwtween slices, thick
++    ADDR_TM_3B_TILED_THIN1      = 14,   ///< 600 HWL only, with bank swap
++    ADDR_TM_3B_TILED_THICK      = 15,   ///< 600 HWL only, with bank swap, thick
++    ADDR_TM_2D_TILED_XTHICK     = 16,   ///< Tile is 8x8x8, valid from NI
++    ADDR_TM_3D_TILED_XTHICK     = 17,   ///< Tile is 8x8x8, valid from NI
++    ADDR_TM_POWER_SAVE          = 18,   ///< Power save mode, only used by KMD on NI
++    ADDR_TM_PRT_TILED_THIN1     = 19,   ///< No bank/pipe rotation or hashing beyond macrotile size
++    ADDR_TM_PRT_2D_TILED_THIN1  = 20,   ///< Same as 2D_TILED_THIN1, PRT only
++    ADDR_TM_PRT_3D_TILED_THIN1  = 21,   ///< Same as 3D_TILED_THIN1, PRT only
++    ADDR_TM_PRT_TILED_THICK     = 22,   ///< No bank/pipe rotation or hashing beyond macrotile size
++    ADDR_TM_PRT_2D_TILED_THICK  = 23,   ///< Same as 2D_TILED_THICK, PRT only
++    ADDR_TM_PRT_3D_TILED_THICK  = 24,   ///< Same as 3D_TILED_THICK, PRT only
++    ADDR_TM_COUNT               = 25,   ///< Must be the value of the last tile mode
++} AddrTileMode;
++
++/**
++***************************************************************************************************
++*   AddrFormat
++*
++*   @brief
++*       Neutral enum for SurfaceFormat
++*
++***************************************************************************************************
++*/
++typedef enum _AddrFormat {
++    ADDR_FMT_INVALID                              = 0x00000000,
++    ADDR_FMT_8                                    = 0x00000001,
++    ADDR_FMT_4_4                                  = 0x00000002,
++    ADDR_FMT_3_3_2                                = 0x00000003,
++    ADDR_FMT_RESERVED_4                           = 0x00000004,
++    ADDR_FMT_16                                   = 0x00000005,
++    ADDR_FMT_16_FLOAT                             = 0x00000006,
++    ADDR_FMT_8_8                                  = 0x00000007,
++    ADDR_FMT_5_6_5                                = 0x00000008,
++    ADDR_FMT_6_5_5                                = 0x00000009,
++    ADDR_FMT_1_5_5_5                              = 0x0000000a,
++    ADDR_FMT_4_4_4_4                              = 0x0000000b,
++    ADDR_FMT_5_5_5_1                              = 0x0000000c,
++    ADDR_FMT_32                                   = 0x0000000d,
++    ADDR_FMT_32_FLOAT                             = 0x0000000e,
++    ADDR_FMT_16_16                                = 0x0000000f,
++    ADDR_FMT_16_16_FLOAT                          = 0x00000010,
++    ADDR_FMT_8_24                                 = 0x00000011,
++    ADDR_FMT_8_24_FLOAT                           = 0x00000012,
++    ADDR_FMT_24_8                                 = 0x00000013,
++    ADDR_FMT_24_8_FLOAT                           = 0x00000014,
++    ADDR_FMT_10_11_11                             = 0x00000015,
++    ADDR_FMT_10_11_11_FLOAT                       = 0x00000016,
++    ADDR_FMT_11_11_10                             = 0x00000017,
++    ADDR_FMT_11_11_10_FLOAT                       = 0x00000018,
++    ADDR_FMT_2_10_10_10                           = 0x00000019,
++    ADDR_FMT_8_8_8_8                              = 0x0000001a,
++    ADDR_FMT_10_10_10_2                           = 0x0000001b,
++    ADDR_FMT_X24_8_32_FLOAT                       = 0x0000001c,
++    ADDR_FMT_32_32                                = 0x0000001d,
++    ADDR_FMT_32_32_FLOAT                          = 0x0000001e,
++    ADDR_FMT_16_16_16_16                          = 0x0000001f,
++    ADDR_FMT_16_16_16_16_FLOAT                    = 0x00000020,
++    ADDR_FMT_RESERVED_33                          = 0x00000021,
++    ADDR_FMT_32_32_32_32                          = 0x00000022,
++    ADDR_FMT_32_32_32_32_FLOAT                    = 0x00000023,
++    ADDR_FMT_RESERVED_36                          = 0x00000024,
++    ADDR_FMT_1                                    = 0x00000025,
++    ADDR_FMT_1_REVERSED                           = 0x00000026,
++    ADDR_FMT_GB_GR                                = 0x00000027,
++    ADDR_FMT_BG_RG                                = 0x00000028,
++    ADDR_FMT_32_AS_8                              = 0x00000029,
++    ADDR_FMT_32_AS_8_8                            = 0x0000002a,
++    ADDR_FMT_5_9_9_9_SHAREDEXP                    = 0x0000002b,
++    ADDR_FMT_8_8_8                                = 0x0000002c,
++    ADDR_FMT_16_16_16                             = 0x0000002d,
++    ADDR_FMT_16_16_16_FLOAT                       = 0x0000002e,
++    ADDR_FMT_32_32_32                             = 0x0000002f,
++    ADDR_FMT_32_32_32_FLOAT                       = 0x00000030,
++    ADDR_FMT_BC1                                  = 0x00000031,
++    ADDR_FMT_BC2                                  = 0x00000032,
++    ADDR_FMT_BC3                                  = 0x00000033,
++    ADDR_FMT_BC4                                  = 0x00000034,
++    ADDR_FMT_BC5                                  = 0x00000035,
++    ADDR_FMT_BC6                                  = 0x00000036,
++    ADDR_FMT_BC7                                  = 0x00000037,
++    ADDR_FMT_32_AS_32_32_32_32                    = 0x00000038,
++    ADDR_FMT_APC3                                 = 0x00000039,
++    ADDR_FMT_APC4                                 = 0x0000003a,
++    ADDR_FMT_APC5                                 = 0x0000003b,
++    ADDR_FMT_APC6                                 = 0x0000003c,
++    ADDR_FMT_APC7                                 = 0x0000003d,
++    ADDR_FMT_CTX1                                 = 0x0000003e,
++    ADDR_FMT_RESERVED_63                          = 0x0000003f,
++} AddrFormat;
++
++/**
++***************************************************************************************************
++*   AddrDepthFormat
++*
++*   @brief
++*       Neutral enum for addrFlt32ToDepthPixel
++*
++***************************************************************************************************
++*/
++typedef enum _AddrDepthFormat
++{
++    ADDR_DEPTH_INVALID                            = 0x00000000,
++    ADDR_DEPTH_16                                 = 0x00000001,
++    ADDR_DEPTH_X8_24                              = 0x00000002,
++    ADDR_DEPTH_8_24                               = 0x00000003,
++    ADDR_DEPTH_X8_24_FLOAT                        = 0x00000004,
++    ADDR_DEPTH_8_24_FLOAT                         = 0x00000005,
++    ADDR_DEPTH_32_FLOAT                           = 0x00000006,
++    ADDR_DEPTH_X24_8_32_FLOAT                     = 0x00000007,
++
++} AddrDepthFormat;
++
++/**
++***************************************************************************************************
++*   AddrColorFormat
++*
++*   @brief
++*       Neutral enum for ColorFormat
++*
++***************************************************************************************************
++*/
++typedef enum _AddrColorFormat
++{
++    ADDR_COLOR_INVALID                            = 0x00000000,
++    ADDR_COLOR_8                                  = 0x00000001,
++    ADDR_COLOR_4_4                                = 0x00000002,
++    ADDR_COLOR_3_3_2                              = 0x00000003,
++    ADDR_COLOR_RESERVED_4                         = 0x00000004,
++    ADDR_COLOR_16                                 = 0x00000005,
++    ADDR_COLOR_16_FLOAT                           = 0x00000006,
++    ADDR_COLOR_8_8                                = 0x00000007,
++    ADDR_COLOR_5_6_5                              = 0x00000008,
++    ADDR_COLOR_6_5_5                              = 0x00000009,
++    ADDR_COLOR_1_5_5_5                            = 0x0000000a,
++    ADDR_COLOR_4_4_4_4                            = 0x0000000b,
++    ADDR_COLOR_5_5_5_1                            = 0x0000000c,
++    ADDR_COLOR_32                                 = 0x0000000d,
++    ADDR_COLOR_32_FLOAT                           = 0x0000000e,
++    ADDR_COLOR_16_16                              = 0x0000000f,
++    ADDR_COLOR_16_16_FLOAT                        = 0x00000010,
++    ADDR_COLOR_8_24                               = 0x00000011,
++    ADDR_COLOR_8_24_FLOAT                         = 0x00000012,
++    ADDR_COLOR_24_8                               = 0x00000013,
++    ADDR_COLOR_24_8_FLOAT                         = 0x00000014,
++    ADDR_COLOR_10_11_11                           = 0x00000015,
++    ADDR_COLOR_10_11_11_FLOAT                     = 0x00000016,
++    ADDR_COLOR_11_11_10                           = 0x00000017,
++    ADDR_COLOR_11_11_10_FLOAT                     = 0x00000018,
++    ADDR_COLOR_2_10_10_10                         = 0x00000019,
++    ADDR_COLOR_8_8_8_8                            = 0x0000001a,
++    ADDR_COLOR_10_10_10_2                         = 0x0000001b,
++    ADDR_COLOR_X24_8_32_FLOAT                     = 0x0000001c,
++    ADDR_COLOR_32_32                              = 0x0000001d,
++    ADDR_COLOR_32_32_FLOAT                        = 0x0000001e,
++    ADDR_COLOR_16_16_16_16                        = 0x0000001f,
++    ADDR_COLOR_16_16_16_16_FLOAT                  = 0x00000020,
++    ADDR_COLOR_RESERVED_33                        = 0x00000021,
++    ADDR_COLOR_32_32_32_32                        = 0x00000022,
++    ADDR_COLOR_32_32_32_32_FLOAT                  = 0x00000023,
++} AddrColorFormat;
++
++/**
++***************************************************************************************************
++*   AddrSurfaceNumber
++*
++*   @brief
++*       Neutral enum for SurfaceNumber
++*
++***************************************************************************************************
++*/
++typedef enum _AddrSurfaceNumber {
++    ADDR_NUMBER_UNORM                             = 0x00000000,
++    ADDR_NUMBER_SNORM                             = 0x00000001,
++    ADDR_NUMBER_USCALED                           = 0x00000002,
++    ADDR_NUMBER_SSCALED                           = 0x00000003,
++    ADDR_NUMBER_UINT                              = 0x00000004,
++    ADDR_NUMBER_SINT                              = 0x00000005,
++    ADDR_NUMBER_SRGB                              = 0x00000006,
++    ADDR_NUMBER_FLOAT                             = 0x00000007,
++} AddrSurfaceNumber;
++
++/**
++***************************************************************************************************
++*   AddrSurfaceSwap
++*
++*   @brief
++*       Neutral enum for SurfaceSwap
++*
++***************************************************************************************************
++*/
++typedef enum _AddrSurfaceSwap {
++    ADDR_SWAP_STD                                 = 0x00000000,
++    ADDR_SWAP_ALT                                 = 0x00000001,
++    ADDR_SWAP_STD_REV                             = 0x00000002,
++    ADDR_SWAP_ALT_REV                             = 0x00000003,
++} AddrSurfaceSwap;
++
++/**
++***************************************************************************************************
++*   AddrHtileBlockSize
++*
++*   @brief
++*       Size of HTILE blocks, valid values are 4 or 8 for now
++***************************************************************************************************
++*/
++typedef enum _AddrHtileBlockSize
++{
++    ADDR_HTILE_BLOCKSIZE_4 = 4,
++    ADDR_HTILE_BLOCKSIZE_8 = 8,
++} AddrHtileBlockSize;
++
++
++/**
++***************************************************************************************************
++*   AddrPipeCfg
++*
++*   @brief
++*       The pipe configuration field specifies both the number of pipes and
++*       how pipes are interleaved on the surface.
++*       The expression of number of pipes, the shader engine tile size, and packer tile size
++*       is encoded in a PIPE_CONFIG register field.
++*       In general the number of pipes usually matches the number of memory channels of the
++*       hardware configuration.
++*       For hw configurations w/ non-pow2 memory number of memory channels, it usually matches
++*       the number of ROP units(? TODO: which registers??)
++*       The enum value = hw enum + 1 which is to reserve 0 for requesting default.
++***************************************************************************************************
++*/
++typedef enum _AddrPipeCfg
++{
++    ADDR_PIPECFG_INVALID         = 0,
++    ADDR_PIPECFG_P2              = 1, /// 2 pipes,
++    ADDR_PIPECFG_P4_8x16         = 5, /// 4 pipes,
++    ADDR_PIPECFG_P4_16x16        = 6,
++    ADDR_PIPECFG_P4_16x32        = 7,
++    ADDR_PIPECFG_P4_32x32        = 8,
++    ADDR_PIPECFG_P8_16x16_8x16   = 9, /// 8 pipes
++    ADDR_PIPECFG_P8_16x32_8x16   = 10,
++    ADDR_PIPECFG_P8_32x32_8x16   = 11,
++    ADDR_PIPECFG_P8_16x32_16x16  = 12,
++    ADDR_PIPECFG_P8_32x32_16x16  = 13,
++    ADDR_PIPECFG_P8_32x32_16x32  = 14,
++    ADDR_PIPECFG_P8_32x64_32x32  = 15,
++    ADDR_PIPECFG_P16_32x32_8x16  = 17, /// 16 pipes
++    ADDR_PIPECFG_P16_32x32_16x16 = 18,
++    ADDR_PIPECFG_MAX             = 19,
++} AddrPipeCfg;
++
++/**
++***************************************************************************************************
++* AddrTileType
++*
++*   @brief
++*       Neutral enums that specifies micro tile type (MICRO_TILE_MODE)
++***************************************************************************************************
++*/
++typedef enum _AddrTileType
++{
++    ADDR_DISPLAYABLE        = 0,    ///< Displayable tiling
++    ADDR_NON_DISPLAYABLE    = 1,    ///< Non-displayable tiling, a.k.a thin micro tiling
++    ADDR_DEPTH_SAMPLE_ORDER = 2,    ///< Same as non-displayable plus depth-sample-order
++    ADDR_ROTATED            = 3,    ///< Rotated displayable tiling
++    ADDR_THICK              = 4,    ///< Thick micro-tiling, only valid for THICK and XTHICK
++} AddrTileType;
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//
++//  Type definitions: short system-independent names for address library types
++//
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++#if !defined(__APPLE__)
++
++#ifndef BOOL_32        // no bool type in C
++/// @brief Boolean type, since none is defined in C
++/// @ingroup type
++#define BOOL_32 int
++#endif
++
++#ifndef INT_32
++#define INT_32  int
++#endif
++
++#ifndef UINT_32
++#define UINT_32 unsigned int
++#endif
++
++#ifndef INT_16
++#define INT_16  short
++#endif
++
++#ifndef UINT_16
++#define UINT_16 unsigned short
++#endif
++
++#ifndef INT_8
++#define INT_8   char
++#endif
++
++#ifndef UINT_8
++#define UINT_8  unsigned char
++#endif
++
++#ifndef NULL
++#define NULL 0
++#endif
++
++#ifndef TRUE
++#define TRUE 1
++#endif
++
++#ifndef FALSE
++#define FALSE 0
++#endif
++
++//
++//  64-bit integer types depend on the compiler
++//
++#if defined( __GNUC__ ) || defined( __WATCOMC__ )
++#define INT_64   long long
++#define UINT_64  unsigned long long
++
++#elif defined( _WIN32 )
++#define INT_64   __int64
++#define UINT_64  unsigned __int64
++
++#else
++#error Unsupported compiler and/or operating system for 64-bit integers
++
++/// @brief 64-bit signed integer type (compiler dependent)
++/// @ingroup type
++///
++/// The addrlib defines a 64-bit signed integer type for either
++/// Gnu/Watcom compilers (which use the first syntax) or for
++/// the Windows VCC compiler (which uses the second syntax).
++#define INT_64  long long OR __int64
++
++/// @brief 64-bit unsigned integer type (compiler dependent)
++/// @ingroup type
++///
++/// The addrlib defines a 64-bit unsigned integer type for either
++/// Gnu/Watcom compilers (which use the first syntax) or for
++/// the Windows VCC compiler (which uses the second syntax).
++///
++#define UINT_64  unsigned long long OR unsigned __int64
++#endif
++
++#endif // #if !defined(__APPLE__)
++
++//  ADDR64X is used to print addresses in hex form on both Windows and Linux
++//
++#if defined( __GNUC__ ) || defined( __WATCOMC__ )
++#define ADDR64X "llx"
++#define ADDR64D "lld"
++
++#elif defined( _WIN32 )
++#define ADDR64X "I64x"
++#define ADDR64D "I64d"
++
++#else
++#error Unsupported compiler and/or operating system for 64-bit integers
++
++/// @brief Addrlib device address 64-bit printf tag  (compiler dependent)
++/// @ingroup type
++///
++/// This allows printf to display an ADDR_64 for either the Windows VCC compiler
++/// (which used this value) or the Gnu/Watcom compilers (which use "llx".
++/// An example of use is printf("addr 0x%"ADDR64X"\n", address);
++///
++#define ADDR64X "llx" OR "I64x"
++#define ADDR64D "lld" OR "I64d"
++#endif
++
++
++/// @brief Union for storing a 32-bit float or 32-bit integer
++/// @ingroup type
++///
++/// This union provides a simple way to convert between a 32-bit float
++/// and a 32-bit integer. It also prevents the compiler from producing
++/// code that alters NaN values when assiging or coying floats.
++/// Therefore, all address library routines that pass or return 32-bit
++/// floating point data do so by passing or returning a FLT_32.
++///
++typedef union {
++    INT_32   i;
++    UINT_32  u;
++    float    f;
++} ADDR_FLT_32;
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//
++//  Macros for controlling linking and building on multiple systems
++//
++///////////////////////////////////////////////////////////////////////////////////////////////////
++#if defined(_MSC_VER)
++#if defined(va_copy)
++#undef va_copy  //redefine va_copy to support VC2013
++#endif
++#endif
++
++#if !defined(va_copy)
++#define va_copy(dst, src) \
++    ((void) memcpy(&(dst), &(src), sizeof(va_list)))
++#endif
++
++#endif // __ADDR_TYPES_H__
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrcommon.h b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrcommon.h
+new file mode 100644
+index 0000000..f996c9a
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrcommon.h
+@@ -0,0 +1,558 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrcommon.h
++* @brief Contains the helper function and constants
++***************************************************************************************************
++*/
++
++#ifndef __ADDR_COMMON_H__
++#define __ADDR_COMMON_H__
++
++#include "addrinterface.h"
++
++
++// ADDR_LNX_KERNEL_BUILD is for internal build
++// Moved from addrinterface.h so __KERNEL__ is not needed any more
++#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__))
++    #include "lnx_common_defs.h" // ported from cmmqs
++#elif !defined(__APPLE__)
++    #include <stdlib.h>
++    #include <string.h>
++#endif
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++// Common constants
++///////////////////////////////////////////////////////////////////////////////////////////////////
++static const UINT_32 MicroTileWidth      = 8;       ///< Micro tile width, for 1D and 2D tiling
++static const UINT_32 MicroTileHeight     = 8;       ///< Micro tile height, for 1D and 2D tiling
++static const UINT_32 ThickTileThickness  = 4;       ///< Micro tile thickness, for THICK modes
++static const UINT_32 XThickTileThickness = 8;       ///< Extra thick tiling thickness
++static const UINT_32 PowerSaveTileBytes  = 64;      ///< Nuber of bytes per tile for power save 64
++static const UINT_32 CmaskCacheBits      = 1024;    ///< Number of bits for CMASK cache
++static const UINT_32 CmaskElemBits       = 4;       ///< Number of bits for CMASK element
++static const UINT_32 HtileCacheBits      = 16384;   ///< Number of bits for HTILE cache 512*32
++
++static const UINT_32 MicroTilePixels     = MicroTileWidth * MicroTileHeight;
++
++static const INT_32 TileIndexInvalid        = TILEINDEX_INVALID;
++static const INT_32 TileIndexLinearGeneral  = TILEINDEX_LINEAR_GENERAL;
++static const INT_32 TileIndexNoMacroIndex   = -3;
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++// Common macros
++///////////////////////////////////////////////////////////////////////////////////////////////////
++#define BITS_PER_BYTE 8
++#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE )
++#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE )
++
++/// Helper macros to select a single bit from an int (undefined later in section)
++#define _BIT(v,b)      (((v) >> (b) ) & 1)
++
++/**
++***************************************************************************************************
++* @brief Enums to identify AddrLib type
++***************************************************************************************************
++*/
++enum AddrLibClass
++{
++    BASE_ADDRLIB = 0x0,
++    R600_ADDRLIB = 0x6,
++    R800_ADDRLIB = 0x8,
++    SI_ADDRLIB   = 0xa,
++    CI_ADDRLIB   = 0xb,
++};
++
++/**
++***************************************************************************************************
++* AddrChipFamily
++*
++*   @brief
++*       Neutral enums that specifies chip family.
++*
++***************************************************************************************************
++*/
++enum AddrChipFamily
++{
++    ADDR_CHIP_FAMILY_IVLD,    ///< Invalid family
++    ADDR_CHIP_FAMILY_R6XX,
++    ADDR_CHIP_FAMILY_R7XX,
++    ADDR_CHIP_FAMILY_R8XX,
++    ADDR_CHIP_FAMILY_NI,
++    ADDR_CHIP_FAMILY_SI,
++    ADDR_CHIP_FAMILY_CI,
++    ADDR_CHIP_FAMILY_VI,
++};
++
++/**
++***************************************************************************************************
++* ADDR_CONFIG_FLAGS
++*
++*   @brief
++*       This structure is used to set addr configuration flags.
++***************************************************************************************************
++*/
++union ADDR_CONFIG_FLAGS
++{
++    struct
++    {
++        /// Clients do not need to set these flags except forceLinearAligned.
++        /// There flags are set up by AddrLib inside thru AddrInitGlobalParamsFromRegister
++        UINT_32 optimalBankSwap        : 1;    ///< New bank tiling for RV770 only
++        UINT_32 noCubeMipSlicesPad     : 1;    ///< Disables faces padding for cubemap mipmaps
++        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
++                                               ///  output structure
++        UINT_32 ignoreTileInfo         : 1;    ///< Don't use tile info structure
++        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
++        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined swizzle
++        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
++        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
++        UINT_32 degradeBaseLevel       : 1;    ///< Degrade to 1D modes automatically for base level
++        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
++        UINT_32 reserved               : 22;   ///< Reserved bits for future use
++    };
++
++    UINT_32 value;
++};
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++// Platform specific debug break defines
++///////////////////////////////////////////////////////////////////////////////////////////////////
++#if DEBUG
++    #if defined(__GNUC__)
++        #define ADDR_DBG_BREAK()
++    #elif defined(__APPLE__)
++        #define ADDR_DBG_BREAK()    { IOPanic("");}
++    #else
++        #define ADDR_DBG_BREAK()    { __debugbreak(); }
++    #endif
++#else
++    #define ADDR_DBG_BREAK()
++#endif
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++// Debug assertions used in AddrLib
++///////////////////////////////////////////////////////////////////////////////////////////////////
++#if DEBUG
++#define ADDR_ASSERT(__e) if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); }
++#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
++#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
++#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
++#else //DEBUG
++#define ADDR_ASSERT(__e)
++#define ADDR_ASSERT_ALWAYS()
++#define ADDR_UNHANDLED_CASE()
++#define ADDR_NOT_IMPLEMENTED()
++#endif //DEBUG
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++// Debug print macro from legacy address library
++///////////////////////////////////////////////////////////////////////////////////////////////////
++#if DEBUG
++
++#define ADDR_PRNT(a)    AddrObject::DebugPrint a
++
++/// @brief Macro for reporting informational messages
++/// @ingroup util
++///
++/// This macro optionally prints an informational message to stdout.
++/// The first parameter is a condition -- if it is true, nothing is done.
++/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
++/// starting with a string. This is passed to printf() or an equivalent
++/// in order to format the informational message. For example,
++/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3".
++///
++#define ADDR_INFO(cond, a)         \
++{ if (!(cond)) { ADDR_PRNT(a); } }
++
++
++/// @brief Macro for reporting error warning messages
++/// @ingroup util
++///
++/// This macro optionally prints an error warning message to stdout,
++/// followed by the file name and line number where the macro was called.
++/// The first parameter is a condition -- if it is true, nothing is done.
++/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
++/// starting with a string. This is passed to printf() or an equivalent
++/// in order to format the informational message. For example,
++/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by
++/// a second line with the file name and line number.
++///
++#define ADDR_WARN(cond, a)         \
++{ if (!(cond))                     \
++  { ADDR_PRNT(a);                  \
++    ADDR_PRNT(("  WARNING in file %s, line %d\n", __FILE__, __LINE__)); \
++} }
++
++
++/// @brief Macro for reporting fatal error conditions
++/// @ingroup util
++///
++/// This macro optionally stops execution of the current routine
++/// after printing an error warning message to stdout,
++/// followed by the file name and line number where the macro was called.
++/// The first parameter is a condition -- if it is true, nothing is done.
++/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
++/// starting with a string. This is passed to printf() or an equivalent
++/// in order to format the informational message. For example,
++/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by
++/// a second line with the file name and line number, then stops execution.
++///
++#define ADDR_EXIT(cond, a)         \
++{ if (!(cond))                     \
++  { ADDR_PRNT(a); ADDR_DBG_BREAK();\
++} }
++
++#else // DEBUG
++
++#define ADDRDPF 1 ? (void)0 : (void)
++
++#define ADDR_PRNT(a)
++
++#define ADDR_DBG_BREAK()
++
++#define ADDR_INFO(cond, a)
++
++#define ADDR_WARN(cond, a)
++
++#define ADDR_EXIT(cond, a)
++
++#endif // DEBUG
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++// Misc helper functions
++////////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrXorReduce
++*
++*   @brief
++*       Xor the right-side numberOfBits bits of x.
++***************************************************************************************************
++*/
++static inline UINT_32 XorReduce(
++    UINT_32 x,
++    UINT_32 numberOfBits)
++{
++    UINT_32 i;
++    UINT_32 result = x & 1;
++
++    for (i=1; i<numberOfBits; i++)
++    {
++        result ^= ((x>>i) & 1);
++    }
++
++    return result;
++}
++
++/**
++***************************************************************************************************
++*   IsPow2
++*
++*   @brief
++*       Check if the size (UINT_32) is pow 2
++***************************************************************************************************
++*/
++static inline UINT_32 IsPow2(
++    UINT_32 dim)        ///< [in] dimension of miplevel
++{
++    ADDR_ASSERT(dim > 0);
++    return !(dim & (dim - 1));
++}
++
++/**
++***************************************************************************************************
++*   IsPow2
++*
++*   @brief
++*       Check if the size (UINT_64) is pow 2
++***************************************************************************************************
++*/
++static inline UINT_64 IsPow2(
++    UINT_64 dim)        ///< [in] dimension of miplevel
++{
++    ADDR_ASSERT(dim > 0);
++    return !(dim & (dim - 1));
++}
++
++/**
++***************************************************************************************************
++*   ByteAlign
++*
++*   @brief
++*       Align UINT_32 "x" to "align" alignment, "align" should be power of 2
++***************************************************************************************************
++*/
++static inline UINT_32 PowTwoAlign(
++    UINT_32 x,
++    UINT_32 align)
++{
++    //
++    // Assert that x is a power of two.
++    //
++    ADDR_ASSERT(IsPow2(align));
++    return (x + (align - 1)) & (~(align - 1));
++}
++
++/**
++***************************************************************************************************
++*   ByteAlign
++*
++*   @brief
++*       Align UINT_64 "x" to "align" alignment, "align" should be power of 2
++***************************************************************************************************
++*/
++static inline UINT_64 PowTwoAlign(
++    UINT_64 x,
++    UINT_64 align)
++{
++    //
++    // Assert that x is a power of two.
++    //
++    ADDR_ASSERT(IsPow2(align));
++    return (x + (align - 1)) & (~(align - 1));
++}
++
++/**
++***************************************************************************************************
++*   Min
++*
++*   @brief
++*       Get the min value between two unsigned values
++***************************************************************************************************
++*/
++static inline UINT_32 Min(
++    UINT_32 value1,
++    UINT_32 value2)
++{
++    return ((value1 < (value2)) ? (value1) : value2);
++}
++
++/**
++***************************************************************************************************
++*   Min
++*
++*   @brief
++*       Get the min value between two signed values
++***************************************************************************************************
++*/
++static inline INT_32 Min(
++    INT_32 value1,
++    INT_32 value2)
++{
++    return ((value1 < (value2)) ? (value1) : value2);
++}
++
++/**
++***************************************************************************************************
++*   Max
++*
++*   @brief
++*       Get the max value between two unsigned values
++***************************************************************************************************
++*/
++static inline UINT_32 Max(
++    UINT_32 value1,
++    UINT_32 value2)
++{
++    return ((value1 > (value2)) ? (value1) : value2);
++}
++
++/**
++***************************************************************************************************
++*   Max
++*
++*   @brief
++*       Get the max value between two signed values
++***************************************************************************************************
++*/
++static inline INT_32 Max(
++    INT_32 value1,
++    INT_32 value2)
++{
++    return ((value1 > (value2)) ? (value1) : value2);
++}
++
++/**
++***************************************************************************************************
++*   NextPow2
++*
++*   @brief
++*       Compute the mipmap's next level dim size
++***************************************************************************************************
++*/
++static inline UINT_32 NextPow2(
++    UINT_32 dim)        ///< [in] dimension of miplevel
++{
++    UINT_32 newDim;
++
++    newDim = 1;
++
++    if (dim > 0x7fffffff)
++    {
++        ADDR_ASSERT_ALWAYS();
++        newDim = 0x80000000;
++    }
++    else
++    {
++        while (newDim < dim)
++        {
++            newDim <<= 1;
++        }
++    }
++
++    return newDim;
++}
++
++/**
++***************************************************************************************************
++*   Log2
++*
++*   @brief
++*       Compute log of base 2
++***************************************************************************************************
++*/
++static inline UINT_32 Log2(
++    UINT_32 x)      ///< [in] the value should calculate log based 2
++{
++    UINT_32 y;
++
++    //
++    // Assert that x is a power of two.
++    //
++    ADDR_ASSERT(IsPow2(x));
++
++    y = 0;
++    while (x > 1)
++    {
++        x >>= 1;
++        y++;
++    }
++
++    return y;
++}
++
++/**
++***************************************************************************************************
++*   QLog2
++*
++*   @brief
++*       Compute log of base 2 quickly (<= 16)
++***************************************************************************************************
++*/
++static inline UINT_32 QLog2(
++    UINT_32 x)      ///< [in] the value should calculate log based 2
++{
++    ADDR_ASSERT(x <= 16);
++
++    UINT_32 y = 0;
++
++    switch (x)
++    {
++        case 1:
++            y = 0;
++            break;
++        case 2:
++            y = 1;
++            break;
++        case 4:
++            y = 2;
++            break;
++        case 8:
++            y = 3;
++            break;
++        case 16:
++            y = 4;
++            break;
++        default:
++            ADDR_ASSERT_ALWAYS();
++    }
++
++    return y;
++}
++
++/**
++***************************************************************************************************
++*   SafeAssign
++*
++*   @brief
++*       NULL pointer safe assignment
++***************************************************************************************************
++*/
++static inline VOID SafeAssign(
++    UINT_32*    pLVal,  ///< [in] Pointer to left val
++    UINT_32     rVal)   ///< [in] Right value
++{
++    if (pLVal)
++    {
++        *pLVal = rVal;
++    }
++}
++
++/**
++***************************************************************************************************
++*   SafeAssign
++*
++*   @brief
++*       NULL pointer safe assignment for 64bit values
++***************************************************************************************************
++*/
++static inline VOID SafeAssign(
++    UINT_64*    pLVal,  ///< [in] Pointer to left val
++    UINT_64     rVal)   ///< [in] Right value
++{
++    if (pLVal)
++    {
++        *pLVal = rVal;
++    }
++}
++
++/**
++***************************************************************************************************
++*   SafeAssign
++*
++*   @brief
++*       NULL pointer safe assignment for AddrTileMode
++***************************************************************************************************
++*/
++static inline VOID SafeAssign(
++    AddrTileMode*    pLVal, ///< [in] Pointer to left val
++    AddrTileMode     rVal)  ///< [in] Right value
++{
++    if (pLVal)
++    {
++        *pLVal = rVal;
++    }
++}
++
++#endif // __ADDR_COMMON_H__
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrelemlib.cpp b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrelemlib.cpp
+new file mode 100644
+index 0000000..eb1b7de
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrelemlib.cpp
+@@ -0,0 +1,1678 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrelemlib.cpp
++* @brief Contains the class implementation for element/pixel related functions
++***************************************************************************************************
++*/
++
++#include "addrelemlib.h"
++#include "addrlib.h"
++
++
++/**
++***************************************************************************************************
++*   AddrElemLib::AddrElemLib
++*
++*   @brief
++*       constructor
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++AddrElemLib::AddrElemLib(
++    AddrLib* const pAddrLib) :  ///< [in] Parent addrlib instance pointer
++    AddrObject(pAddrLib->GetClient()),
++    m_pAddrLib(pAddrLib)
++{
++    switch (m_pAddrLib->GetAddrChipFamily())
++    {
++        case ADDR_CHIP_FAMILY_R6XX:
++            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
++            m_fp16ExportNorm = 0;
++            break;
++        case ADDR_CHIP_FAMILY_R7XX:
++            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
++            m_fp16ExportNorm = 1;
++            break;
++        case ADDR_CHIP_FAMILY_R8XX:
++        case ADDR_CHIP_FAMILY_NI: // Same as 8xx
++            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
++            m_fp16ExportNorm = 1;
++            break;
++        default:
++            m_fp16ExportNorm = 1;
++            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
++    }
++
++    m_configFlags.value = 0;
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::~AddrElemLib
++*
++*   @brief
++*       destructor
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++AddrElemLib::~AddrElemLib()
++{
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::Create
++*
++*   @brief
++*       Creates and initializes AddrLib object.
++*
++*   @return
++*       Returns point to ADDR_CREATEINFO if successful.
++***************************************************************************************************
++*/
++AddrElemLib* AddrElemLib::Create(
++    const AddrLib* const        pAddrLib)   ///< [in] Pointer of parent AddrLib instance
++{
++    AddrElemLib* pElemLib = NULL;
++
++    if (pAddrLib)
++    {
++        pElemLib = new(pAddrLib->GetClient()) AddrElemLib(const_cast<AddrLib* const>(pAddrLib));
++    }
++
++    return pElemLib;
++}
++
++/**************************************************************************************************
++*   AddrElemLib::Flt32sToInt32s
++*
++*   @brief
++*       Convert a ADDR_FLT_32 value to Int32 value
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrElemLib::Flt32sToInt32s(
++    ADDR_FLT_32     value,      ///< [in] ADDR_FLT_32 value
++    UINT_32         bits,       ///< [in] nubmer of bits in value
++    AddrNumberType  numberType, ///< [in] the type of number
++    UINT_32*        pResult)    ///< [out] Int32 value
++{
++    UINT_8 round = 128;    //ADDR_ROUND_BY_HALF
++    UINT_32 uscale;
++    UINT_32 sign;
++
++    //convert each component to an INT_32
++    switch ( numberType )
++    {
++        case ADDR_NO_NUMBER:    //fall through
++        case ADDR_ZERO:         //fall through
++        case ADDR_ONE:          //fall through
++        case ADDR_EPSILON:      //fall through
++            return;        // these are zero-bit components, so don't set result
++
++        case ADDR_UINT_BITS:            // unsigned integer bit field, clamped to range
++            uscale = (1<<bits) - 1;
++            if (bits == 32)               // special case unsigned 32-bit int
++            {
++                *pResult = value.i;
++            }
++            else
++            {
++                if ((value.i < 0) || (value.u > uscale))
++                {
++                    *pResult = uscale;
++                }
++                else
++                {
++                    *pResult = value.i;
++                }
++                return;
++            }
++
++        // The algorithm used in the DB and TX differs at one value for 24-bit unorms
++        case ADDR_UNORM_R6XXDB:        // unsigned repeating fraction
++            if ((bits==24) && (value.i == 0x33000000))
++            {
++                *pResult = 1;
++                return;
++            }              // Else treat like ADDR_UNORM_R6XX
++
++        case ADDR_UNORM_R6XX:            // unsigned repeating fraction
++            if (value.f <= 0)
++            {
++                *pResult = 0;            // first clamp to [0..1]
++            }
++            else
++            {
++                if (value.f >= 1)
++                {
++                     *pResult = (1<<bits) - 1;
++                }
++                else
++                {
++                    if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF)
++                    {
++                        *pResult = 0;                        // NaN, so force to 0
++                    }
++
++                    #if 0 // floating point version for documentation
++                    else
++                    {
++                        FLOAT f = value.f * ((1<<bits) - 1);
++                        *pResult = static_cast<INT_32>(f + (round/256.0f));
++                    }
++                    #endif
++                    else
++                    {
++                        ADDR_FLT_32 scaled;
++                        ADDR_FLT_32 shifted;
++                        UINT_64 truncated, rounded;
++                        UINT_32 altShift;
++                        UINT_32 mask = (1 << bits) - 1;
++                        UINT_32 half = 1 << (bits - 1);
++                        UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
++                        UINT_64 temp = mant24 - (mant24>>bits) -
++                            static_cast<INT_32>((mant24 & mask) > half);
++                        UINT_32 exp8 = value.i >> 23;
++                        UINT_32 shift = 126 - exp8 + 24 - bits;
++                        UINT_64 final;
++
++                        if (shift >= 32) // This is zero, even with maximum dither add
++                        {
++                            final = 0;
++                        }
++                        else
++                        {
++                            final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8);
++                        }
++                        //ADDR_EXIT( *pResult == final,
++                        //    ("Float %x converted to %d-bit Unorm %x != bitwise %x",
++                        //     value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
++                        if (final > mask)
++                        {
++                            final = mask;
++                        }
++
++                        scaled.f  = value.f * ((1<<bits) - 1);
++                        shifted.f = (scaled.f * 256);
++                        truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8;
++                        altShift  = 126 + 24 + 8 - ((shifted.i>>23)&0xFF);
++                        truncated = (altShift > 60) ? 0 : truncated >> altShift;
++                        rounded   = static_cast<INT_32>((round + truncated) >> 8);
++                        //if (rounded > ((1<<bits) - 1))
++                        //    rounded = ((1<<bits) - 1);
++                        *pResult = static_cast<INT_32>(rounded); //(INT_32)final;
++                    }
++                }
++            }
++
++            return;
++
++        case ADDR_S8FLOAT32:    // 32-bit IEEE float, passes through NaN values
++            *pResult = value.i;
++            return;
++
++        // @@ FIX ROUNDING in this code, fix the denorm case
++        case ADDR_U4FLOATC:         // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
++            sign = (value.i >> 31) & 1;
++            if ((value.i&0x7F800000) == 0x7F800000)    // If NaN or INF:
++            {
++                if ((value.i&0x007FFFFF) != 0)             // then if NaN
++                {
++                    *pResult = 0;                       // return 0
++                }
++                else
++                {
++                    *pResult = (sign)?0:0xF00000;           // else +INF->+1, -INF->0
++                }
++                return;
++            }
++            if (value.f <= 0)
++            {
++                *pResult = 0;
++            }
++            else
++            {
++                if (value.f>=1)
++                {
++                    *pResult = 0xF << (bits-4);
++                }
++                else
++                {
++                    if ((value.i>>23) > 112 )
++                    {
++                        // 24-bit float: normalized
++                        // value.i += 1 << (22-bits+4);
++                        // round the IEEE mantissa to mantissa size
++                        // @@ NOTE: add code to support rounding
++                        value.u &= 0x7FFFFFF;             // mask off high 4 exponent bits
++                        *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
++                    }
++                    else
++                    {
++                        // 24-bit float: denormalized
++                        value.f = value.f / (1<<28) / (1<<28);
++                        value.f = value.f / (1<<28) / (1<<28);    // convert to IEEE denorm
++                        // value.i += 1 << (22-bits+4);
++                        // round the IEEE mantissa to mantissa size
++                        // @@ NOTE: add code to support rounding
++                        *pResult = value.i >> (23-bits+4);    // shift off unused mantissa bits
++                    }
++                }
++            }
++
++            return;
++
++        default:                    // invalid number mode
++            //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
++            break;
++
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::Int32sToPixel
++*
++*   @brief
++*       Pack 32-bit integer values into an uncompressed pixel,
++*       in the proper order
++*
++*   @return
++*       N/A
++*
++*   @note
++*       This entry point packes four 32-bit integer values into
++*       an uncompressed pixel. The pixel values are specifies in
++*       standard order, e.g. depth/stencil. This routine asserts
++*       if called on compressed pixel.
++***************************************************************************************************
++*/
++VOID AddrElemLib::Int32sToPixel(
++    UINT_32              numComps,      ///< [in] number of components
++    UINT_32*             pComps,        ///< [in] compnents
++    UINT_32*             pCompBits,     ///< [in] total bits in each component
++    UINT_32*             pCompStart,    ///< [in] the first bit position of each component
++    ADDR_COMPONENT_FLAGS properties,    ///< [in] properties about byteAligned, exportNorm
++    UINT_32              resultBits,    ///< [in] result bits: total bpp after decompression
++    UINT_8*              pPixel)        ///< [out] a depth/stencil pixel value
++{
++    UINT_32 i;
++    UINT_32 j;
++    UINT_32 start;
++    UINT_32 size;
++    UINT_32 byte;
++    UINT_32 value = 0;
++    UINT_32 compMask;
++    UINT_32 elemMask=0;
++    UINT_32 elementXor = 0;  // address xor when reading bytes from elements
++
++
++    // @@ NOTE: assert if called on a compressed format!
++
++    if (properties.byteAligned)    // Components are all byte-sized
++    {
++        for (i = 0; i < numComps; i++)        // Then for each component
++        {
++            // Copy the bytes of the component into the element
++            start = pCompStart[i] / 8;
++            size  = pCompBits[i]  / 8;
++            for (j = 0; j < size; j++)
++            {
++                pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j));
++            }
++        }
++    }
++    else                        // Element is 32-bits or less, components are bit fields
++    {
++        // First, extract each component in turn and combine it into a 32-bit value
++        for (i = 0; i < numComps; i++)
++        {
++            compMask = (1 << pCompBits[i]) - 1;
++            elemMask |= compMask << pCompStart[i];
++            value |= (pComps[i] & compMask) << pCompStart[i];
++        }
++
++        // Mext, copy the masked value into the element
++        size = (resultBits + 7) / 8;
++        for (i = 0; i < size; i++)
++        {
++            byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
++            pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i)));
++        }
++    }
++}
++
++/**
++***************************************************************************************************
++*   Flt32ToDepthPixel
++*
++*   @brief
++*       Convert a FLT_32 value to a depth/stencil pixel value
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrElemLib::Flt32ToDepthPixel(
++    AddrDepthFormat     format,     ///< [in] Depth format
++    const ADDR_FLT_32   comps[2],   ///< [in] two components of depth
++    UINT_8*             pPixel      ///< [out] depth pixel value
++    ) const
++{
++    UINT_32 i;
++    UINT_32 values[2];
++    ADDR_COMPONENT_FLAGS properties;    // byteAligned, exportNorm
++    UINT_32 resultBits = 0;             // result bits: total bits per pixel after decompression
++
++    ADDR_PIXEL_FORMATINFO fmt;
++
++    // get type for each component
++    PixGetDepthCompInfo(format, &fmt);
++
++    //initialize properties
++    properties.byteAligned = TRUE;
++    properties.exportNorm  = TRUE;
++    properties.floatComp   = FALSE;
++
++    //set properties and result bits
++    for (i = 0; i < 2; i++)
++    {
++        if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
++        {
++            properties.byteAligned = FALSE;
++        }
++
++        if (resultBits < fmt.compStart[i] + fmt.compBit[i])
++        {
++            resultBits = fmt.compStart[i] + fmt.compBit[i];
++        }
++
++        // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
++        if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
++        {
++            properties.exportNorm = FALSE;
++        }
++
++        // Mark if there are any floating point components
++        if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
++        {
++            properties.floatComp = TRUE;
++        }
++    }
++
++    // Convert the two input floats to integer values
++    for (i = 0; i < 2; i++)
++    {
++        Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
++    }
++
++    // Then pack the two integer components, in the proper order
++    Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );
++
++}
++
++/**
++***************************************************************************************************
++*   Flt32ToColorPixel
++*
++*   @brief
++*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrElemLib::Flt32ToColorPixel(
++    AddrColorFormat     format,     ///< [in] Color format
++    AddrSurfaceNumber   surfNum,    ///< [in] Surface number
++    AddrSurfaceSwap     surfSwap,   ///< [in] Surface swap
++    const ADDR_FLT_32   comps[4],   ///< [in] four components of color
++    UINT_8*             pPixel      ///< [out] a red/green/blue/alpha pixel value
++    ) const
++{
++    ADDR_PIXEL_FORMATINFO pixelInfo;
++
++    UINT_32 i;
++    UINT_32 values[4];
++    ADDR_COMPONENT_FLAGS properties;    // byteAligned, exportNorm
++    UINT_32 resultBits = 0;             // result bits: total bits per pixel after decompression
++
++    memset(&pixelInfo, 0, sizeof(ADDR_PIXEL_FORMATINFO));
++
++    PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);
++
++    //initialize properties
++    properties.byteAligned = TRUE;
++    properties.exportNorm  = TRUE;
++    properties.floatComp   = FALSE;
++
++    //set properties and result bits
++    for (i = 0; i < 4; i++)
++    {
++        if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
++        {
++            properties.byteAligned = FALSE;
++        }
++
++        if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
++        {
++            resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
++        }
++
++        if (m_fp16ExportNorm)
++        {
++            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
++            // or if it's not FP and <=16 bits
++            if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
++                && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
++            {
++                properties.exportNorm = FALSE;
++            }
++        }
++        else
++        {
++            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
++            if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
++            {
++                properties.exportNorm = FALSE;
++            }
++        }
++
++        // Mark if there are any floating point components
++        if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
++             (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
++        {
++            properties.floatComp = TRUE;
++        }
++    }
++
++    // Convert the four input floats to integer values
++    for (i = 0; i < 4; i++)
++    {
++        Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
++    }
++
++    // Then pack the four integer components, in the proper order
++    Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
++                  properties, resultBits, pPixel);
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::GetCompType
++*
++*   @brief
++*       Fill per component info
++*
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID AddrElemLib::GetCompType(
++    AddrColorFormat         format,     ///< [in] surface format
++    AddrSurfaceNumber       numType,  ///< [in] number type
++    ADDR_PIXEL_FORMATINFO*  pInfo)       ///< [in][out] per component info out
++{
++    BOOL_32 handled = FALSE;
++
++    // Floating point formats override the number format
++    switch (format)
++    {
++        case ADDR_COLOR_16_FLOAT:            // fall through for all pure floating point format
++        case ADDR_COLOR_16_16_FLOAT:
++        case ADDR_COLOR_16_16_16_16_FLOAT:
++        case ADDR_COLOR_32_FLOAT:
++        case ADDR_COLOR_32_32_FLOAT:
++        case ADDR_COLOR_32_32_32_32_FLOAT:
++        case ADDR_COLOR_10_11_11_FLOAT:
++        case ADDR_COLOR_11_11_10_FLOAT:
++            numType = ADDR_NUMBER_FLOAT;
++            break;
++            // Special handling for the depth formats
++        case ADDR_COLOR_8_24:                // fall through for these 2 similar format
++        case ADDR_COLOR_24_8:
++            for (UINT_32 c = 0; c < 4; c++)
++            {
++                if (pInfo->compBit[c] == 8)
++                {
++                    pInfo->numType[c] = ADDR_UINT_BITS;
++                }
++                else if (pInfo->compBit[c]  == 24)
++                {
++                    pInfo->numType[c] = ADDR_UNORM_R6XX;
++                }
++                else
++                {
++                    pInfo->numType[c] = ADDR_NO_NUMBER;
++                }
++            }
++            handled = TRUE;
++            break;
++        case ADDR_COLOR_8_24_FLOAT:          // fall through for these 3 similar format
++        case ADDR_COLOR_24_8_FLOAT:
++        case ADDR_COLOR_X24_8_32_FLOAT:
++            for (UINT_32 c = 0; c < 4; c++)
++            {
++                if (pInfo->compBit[c] == 8)
++                {
++                    pInfo->numType[c] = ADDR_UINT_BITS;
++                }
++                else if (pInfo->compBit[c] == 24)
++                {
++                    pInfo->numType[c] = ADDR_U4FLOATC;
++                }
++                else if (pInfo->compBit[c] == 32)
++                {
++                    pInfo->numType[c] = ADDR_S8FLOAT32;
++                }
++                else
++                {
++                    pInfo->numType[c] = ADDR_NO_NUMBER;
++                }
++            }
++            handled = TRUE;
++            break;
++        default:
++            break;
++    }
++
++    if (!handled)
++    {
++        for (UINT_32 c = 0; c < 4; c++)
++        {
++            // Assign a number type for each component
++            AddrSurfaceNumber cnum;
++
++            // First handle default component values
++            if (pInfo->compBit[c] == 0)
++            {
++                if (c < 3)
++                {
++                    pInfo->numType[c] = ADDR_ZERO;      // Default is zero for RGB
++                }
++                else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
++                {
++                    pInfo->numType[c] = ADDR_EPSILON;   // Alpha INT_32 bits default is 0x01
++                }
++                else
++                {
++                    pInfo->numType[c] = ADDR_ONE;       // Alpha normal default is float 1.0
++                }
++                continue;
++            }
++            // Now handle small components
++            else if (pInfo->compBit[c] == 1)
++            {
++                if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
++                {
++                    cnum = ADDR_NUMBER_UINT;
++                }
++                else
++                {
++                    cnum = ADDR_NUMBER_UNORM;
++                }
++            }
++            else
++            {
++                cnum = numType;
++            }
++
++            // If no default, set the number type fom num, compbits, and architecture
++            switch (cnum)
++            {
++                case ADDR_NUMBER_SRGB:
++                    pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
++                    break;
++                case ADDR_NUMBER_UNORM:
++                    pInfo->numType[c] = ADDR_UNORM_R6XX;
++                    break;
++                case ADDR_NUMBER_SNORM:
++                    pInfo->numType[c] = ADDR_SNORM_R6XX;
++                    break;
++                case ADDR_NUMBER_USCALED:
++                    pInfo->numType[c] = ADDR_USCALED;  // @@ Do we need separate Pele routine?
++                    break;
++                case ADDR_NUMBER_SSCALED:
++                    pInfo->numType[c] = ADDR_SSCALED;  // @@ Do we need separate Pele routine?
++                    break;
++                case ADDR_NUMBER_FLOAT:
++                    if (pInfo->compBit[c] == 32)
++                    {
++                        pInfo->numType[c] = ADDR_S8FLOAT32;
++                    }
++                    else if (pInfo->compBit[c] == 16)
++                    {
++                        pInfo->numType[c] = ADDR_S5FLOAT;
++                    }
++                    else if (pInfo->compBit[c] >= 10)
++                    {
++                        pInfo->numType[c] = ADDR_U5FLOAT;
++                    }
++                    else
++                    {
++                        ADDR_ASSERT_ALWAYS();
++                    }
++                    break;
++                case ADDR_NUMBER_SINT:
++                    pInfo->numType[c] = ADDR_SINT_BITS;
++                    break;
++                case ADDR_NUMBER_UINT:
++                    pInfo->numType[c] = ADDR_UINT_BITS;
++                    break;
++
++                default:
++                    ADDR_ASSERT(!"Invalid number type");
++                    pInfo->numType[c] = ADDR_NO_NUMBER;
++                    break;
++             }
++        }
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::GetCompSwap
++*
++*   @brief
++*       Get components swapped for color surface
++*
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID AddrElemLib::GetCompSwap(
++    AddrSurfaceSwap         swap,   ///< [in] swap mode
++    ADDR_PIXEL_FORMATINFO*  pInfo)  ///< [in/out] output per component info
++{
++    switch (pInfo->comps)
++    {
++        case 4:
++            switch (swap)
++            {
++                case ADDR_SWAP_ALT:
++                    SwapComps( 0, 2, pInfo );
++                    break;    // BGRA
++                case ADDR_SWAP_STD_REV:
++                    SwapComps( 0, 3, pInfo );
++                    SwapComps( 1, 2, pInfo );
++                    break;    // ABGR
++                case ADDR_SWAP_ALT_REV:
++                    SwapComps( 0, 3, pInfo );
++                    SwapComps( 0, 2, pInfo );
++                    SwapComps( 0, 1, pInfo );
++                    break;    // ARGB
++                default:
++                    break;
++            }
++            break;
++        case 3:
++            switch (swap)
++            {
++                case ADDR_SWAP_ALT_REV:
++                    SwapComps( 0, 3, pInfo );
++                    SwapComps( 0, 2, pInfo );
++                    break;    // AGR
++                case ADDR_SWAP_STD_REV:
++                    SwapComps( 0, 2, pInfo );
++                    break;    // BGR
++                case ADDR_SWAP_ALT:
++                    SwapComps( 2, 3, pInfo );
++                    break;    // RGA
++                default:
++                    break;    // RGB
++            }
++            break;
++        case 2:
++            switch (swap)
++            {
++                case ADDR_SWAP_ALT_REV:
++                    SwapComps( 0, 1, pInfo );
++                    SwapComps( 1, 3, pInfo );
++                    break;    // AR
++                case ADDR_SWAP_STD_REV:
++                    SwapComps( 0, 1, pInfo );
++                    break;    // GR
++                case ADDR_SWAP_ALT:
++                    SwapComps( 1, 3, pInfo );
++                    break;    // RA
++                default:
++                    break;    // RG
++            }
++            break;
++        case 1:
++            switch (swap)
++            {
++                case ADDR_SWAP_ALT_REV:
++                    SwapComps( 0, 3, pInfo );
++                    break;    // A
++                case ADDR_SWAP_STD_REV:
++                    SwapComps( 0, 2, pInfo );
++                    break;    // B
++                case ADDR_SWAP_ALT:
++                    SwapComps( 0, 1, pInfo );
++                    break;    // G
++                default:
++                    break;    // R
++            }
++            break;
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::GetCompSwap
++*
++*   @brief
++*       Get components swapped for color surface
++*
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID AddrElemLib::SwapComps(
++    UINT_32                 c0,     ///< [in] component index 0
++    UINT_32                 c1,     ///< [in] component index 1
++    ADDR_PIXEL_FORMATINFO*  pInfo)  ///< [in/out] output per component info
++{
++    UINT_32 start;
++    UINT_32 bits;
++
++    start = pInfo->compStart[c0];
++    pInfo->compStart[c0] = pInfo->compStart[c1];
++    pInfo->compStart[c1] = start;
++
++    bits  = pInfo->compBit[c0];
++    pInfo->compBit[c0] = pInfo->compBit[c1];
++    pInfo->compBit[c1] = bits;
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::PixGetColorCompInfo
++*
++*   @brief
++*       Get per component info for color surface
++*
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID AddrElemLib::PixGetColorCompInfo(
++    AddrColorFormat         format, ///< [in] surface format, read from register
++    AddrSurfaceNumber       number, ///< [in] pixel number type
++    AddrSurfaceSwap         swap,   ///< [in] component swap mode
++    ADDR_PIXEL_FORMATINFO*  pInfo   ///< [out] output per component info
++    ) const
++{
++    // 1. Get componet bits
++    switch (format)
++    {
++        case ADDR_COLOR_8:
++            GetCompBits(8, 0, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_1_5_5_5:
++            GetCompBits(5, 5, 5, 1, pInfo);
++            break;
++        case ADDR_COLOR_5_6_5:
++            GetCompBits(8, 6, 5, 0, pInfo);
++            break;
++        case ADDR_COLOR_6_5_5:
++            GetCompBits(5, 5, 6, 0, pInfo);
++            break;
++        case ADDR_COLOR_8_8:
++            GetCompBits(8, 8, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_4_4_4_4:
++            GetCompBits(4, 4, 4, 4, pInfo);
++            break;
++        case ADDR_COLOR_16:
++            GetCompBits(16, 0, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_8_8_8_8:
++            GetCompBits(8, 8, 8, 8, pInfo);
++            break;
++        case ADDR_COLOR_2_10_10_10:
++            GetCompBits(10, 10, 10, 2, pInfo);
++            break;
++        case ADDR_COLOR_10_11_11:
++            GetCompBits(11, 11, 10, 0, pInfo);
++            break;
++        case ADDR_COLOR_11_11_10:
++            GetCompBits(10, 11, 11, 0, pInfo);
++            break;
++        case ADDR_COLOR_16_16:
++            GetCompBits(16, 16, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_16_16_16_16:
++            GetCompBits(16, 16, 16, 16, pInfo);
++            break;
++        case ADDR_COLOR_16_FLOAT:
++            GetCompBits(16, 0, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_16_16_FLOAT:
++            GetCompBits(16, 16, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_32_FLOAT:
++            GetCompBits(32, 0, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_32_32_FLOAT:
++            GetCompBits(32, 32, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_16_16_16_16_FLOAT:
++            GetCompBits(16, 16, 16, 16, pInfo);
++            break;
++        case ADDR_COLOR_32_32_32_32_FLOAT:
++            GetCompBits(32, 32, 32, 32, pInfo);
++            break;
++
++        case ADDR_COLOR_32:
++            GetCompBits(32, 0, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_32_32:
++            GetCompBits(32, 32, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_32_32_32_32:
++            GetCompBits(32, 32, 32, 32, pInfo);
++            break;
++        case ADDR_COLOR_10_10_10_2:
++            GetCompBits(2, 10, 10, 10, pInfo);
++            break;
++        case ADDR_COLOR_10_11_11_FLOAT:
++            GetCompBits(11, 11, 10, 0, pInfo);
++            break;
++        case ADDR_COLOR_11_11_10_FLOAT:
++            GetCompBits(10, 11, 11, 0, pInfo);
++            break;
++        case ADDR_COLOR_5_5_5_1:
++            GetCompBits(1, 5, 5, 5, pInfo);
++            break;
++        case ADDR_COLOR_3_3_2:
++            GetCompBits(2, 3, 3, 0, pInfo);
++            break;
++        case ADDR_COLOR_4_4:
++            GetCompBits(4, 4, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_8_24:
++        case ADDR_COLOR_8_24_FLOAT:  // same bit count, fall through
++            GetCompBits(24, 8, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_24_8:
++        case ADDR_COLOR_24_8_FLOAT:  // same bit count, fall through
++            GetCompBits(8, 24, 0, 0, pInfo);
++            break;
++        case ADDR_COLOR_X24_8_32_FLOAT:
++            GetCompBits(32, 8, 0, 0, pInfo);
++            break;
++
++        case ADDR_COLOR_INVALID:
++            GetCompBits(0, 0, 0, 0, pInfo);
++            break;
++        default:
++            ADDR_ASSERT(0);
++            GetCompBits(0, 0, 0, 0, pInfo);
++            break;
++    }
++
++    // 2. Get component number type
++
++    GetCompType(format, number, pInfo);
++
++    // 3. Swap components if needed
++
++    GetCompSwap(swap, pInfo);
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::PixGetDepthCompInfo
++*
++*   @brief
++*       Get per component info for depth surface
++*
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID AddrElemLib::PixGetDepthCompInfo(
++    AddrDepthFormat         format,     ///< [in] surface format, read from register
++    ADDR_PIXEL_FORMATINFO*  pInfo       ///< [out] output per component bits and type
++    ) const
++{
++    if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
++    {
++        if (format == ADDR_DEPTH_8_24_FLOAT)
++        {
++            format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
++        }
++
++        if (format == ADDR_DEPTH_X8_24_FLOAT)
++        {
++            format = ADDR_DEPTH_32_FLOAT;
++        }
++    }
++
++    switch (format)
++    {
++        case ADDR_DEPTH_16:
++            GetCompBits(16, 0, 0, 0, pInfo);
++            break;
++        case ADDR_DEPTH_8_24:
++        case ADDR_DEPTH_8_24_FLOAT:      // similar format, fall through
++            GetCompBits(24, 8, 0, 0, pInfo);
++            break;
++        case ADDR_DEPTH_X8_24:
++        case ADDR_DEPTH_X8_24_FLOAT:     // similar format, fall through
++            GetCompBits(24, 0, 0, 0, pInfo);
++            break;
++        case ADDR_DEPTH_32_FLOAT:
++            GetCompBits(32, 0, 0, 0, pInfo);
++            break;
++        case ADDR_DEPTH_X24_8_32_FLOAT:
++            GetCompBits(32, 8, 0, 0, pInfo);
++            break;
++        case ADDR_DEPTH_INVALID:
++            GetCompBits(0, 0, 0, 0, pInfo);
++            break;
++        default:
++            ADDR_ASSERT(0);
++            GetCompBits(0, 0, 0, 0, pInfo);
++            break;
++    }
++
++    switch (format)
++    {
++        case ADDR_DEPTH_16:
++            pInfo->numType [0] = ADDR_UNORM_R6XX;
++            pInfo->numType [1] = ADDR_ZERO;
++            break;
++        case ADDR_DEPTH_8_24:
++            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
++            pInfo->numType [1] = ADDR_UINT_BITS;
++            break;
++        case ADDR_DEPTH_8_24_FLOAT:
++            pInfo->numType [0] = ADDR_U4FLOATC;
++            pInfo->numType [1] = ADDR_UINT_BITS;
++            break;
++        case ADDR_DEPTH_X8_24:
++            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
++            pInfo->numType [1] = ADDR_ZERO;
++            break;
++        case ADDR_DEPTH_X8_24_FLOAT:
++            pInfo->numType [0] = ADDR_U4FLOATC;
++            pInfo->numType [1] = ADDR_ZERO;
++            break;
++        case ADDR_DEPTH_32_FLOAT:
++            pInfo->numType [0] = ADDR_S8FLOAT32;
++            pInfo->numType [1] = ADDR_ZERO;
++            break;
++        case ADDR_DEPTH_X24_8_32_FLOAT:
++            pInfo->numType [0] = ADDR_S8FLOAT32;
++            pInfo->numType [1] = ADDR_UINT_BITS;
++            break;
++        default:
++            pInfo->numType [0] = ADDR_NO_NUMBER;
++            pInfo->numType [1] = ADDR_NO_NUMBER;
++            break;
++    }
++
++    pInfo->numType [2] = ADDR_NO_NUMBER;
++    pInfo->numType [3] = ADDR_NO_NUMBER;
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::PixGetExportNorm
++*
++*   @brief
++*       Check if fp16 export norm can be enabled.
++*
++*   @return
++*       TRUE if this can be enabled.
++*
++***************************************************************************************************
++*/
++BOOL_32 AddrElemLib::PixGetExportNorm(
++    AddrColorFormat     colorFmt,       ///< [in] surface format, read from register
++    AddrSurfaceNumber   numberFmt,      ///< [in] pixel number type
++    AddrSurfaceSwap     swap            ///< [in] components swap type
++    ) const
++{
++    BOOL_32 enabled = TRUE;
++
++    ADDR_PIXEL_FORMATINFO formatInfo;
++
++    PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);
++
++    for (UINT_32 c = 0; c < 4; c++)
++    {
++        if (m_fp16ExportNorm)
++        {
++            if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
++                (formatInfo.numType[c] != ADDR_U4FLOATC)    &&
++                (formatInfo.numType[c] != ADDR_S5FLOAT)     &&
++                (formatInfo.numType[c] != ADDR_S5FLOATM)    &&
++                (formatInfo.numType[c] != ADDR_U5FLOAT)     &&
++                (formatInfo.numType[c] != ADDR_U3FLOATM))
++            {
++                enabled = FALSE;
++                break;
++            }
++        }
++        else
++        {
++            if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
++            {
++                enabled = FALSE;
++                break;
++            }
++        }
++    }
++
++    return enabled;
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::AdjustSurfaceInfo
++*
++*   @brief
++*       Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrElemLib::AdjustSurfaceInfo(
++    AddrElemMode    elemMode,       ///< [in] element mode
++    UINT_32         expandX,        ///< [in] decompression expansion factor in X
++    UINT_32         expandY,        ///< [in] decompression expansion factor in Y
++    UINT_32*        pBpp,           ///< [in/out] bpp
++    UINT_32*        pBasePitch,     ///< [in/out] base pitch
++    UINT_32*        pWidth,         ///< [in/out] width
++    UINT_32*        pHeight)        ///< [in/out] height
++{
++    UINT_32 packedBits;
++    UINT_32 basePitch;
++    UINT_32 width;
++    UINT_32 height;
++    UINT_32 bpp;
++    BOOL_32 bBCnFormat = FALSE;
++
++    ADDR_ASSERT(pBpp != NULL);
++    ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);
++
++    if (pBpp)
++    {
++        bpp = *pBpp;
++
++        switch (elemMode)
++        {
++            case ADDR_EXPANDED:
++                packedBits = bpp / expandX / expandY;
++                break;
++            case ADDR_PACKED_STD: // Different bit order
++            case ADDR_PACKED_REV:
++                packedBits = bpp * expandX * expandY;
++                break;
++            case ADDR_PACKED_GBGR:
++            case ADDR_PACKED_BGRG:
++                packedBits = bpp; // 32-bit packed ==> 2 32-bit result
++                break;
++            case ADDR_PACKED_BC1: // Fall through
++            case ADDR_PACKED_BC4:
++                packedBits = 64;
++                bBCnFormat = TRUE;
++                break;
++            case ADDR_PACKED_BC2: // Fall through
++            case ADDR_PACKED_BC3: // Fall through
++            case ADDR_PACKED_BC5: // Fall through
++                bBCnFormat = TRUE;
++                packedBits = 128;
++                break;
++            case ADDR_ROUND_BY_HALF:  // Fall through
++            case ADDR_ROUND_TRUNCATE: // Fall through
++            case ADDR_ROUND_DITHER:   // Fall through
++            case ADDR_UNCOMPRESSED:
++                packedBits = bpp;
++                break;
++            default:
++                packedBits = bpp;
++                ADDR_ASSERT_ALWAYS();
++                break;
++        }
++
++        *pBpp = packedBits;
++    }
++
++    if (pWidth && pHeight && pBasePitch)
++    {
++        basePitch = *pBasePitch;
++        width     = *pWidth;
++        height    = *pHeight;
++
++        if ((expandX > 1) || (expandY > 1))
++        {
++            if (elemMode == ADDR_EXPANDED)
++            {
++                basePitch *= expandX;
++                width     *= expandX;
++                height    *= expandY;
++            }
++            else
++            {
++                // Evergreen family workaround
++                if (bBCnFormat && (m_pAddrLib->GetAddrChipFamily() == ADDR_CHIP_FAMILY_R8XX))
++                {
++                    // For BCn we now pad it to POW2 at the beginning so it is safe to
++                    // divide by 4 directly
++                    basePitch = basePitch / expandX;
++                    width     = width  / expandX;
++                    height    = height / expandY;
++#if DEBUG
++                    width     = (width == 0) ? 1 : width;
++                    height    = (height == 0) ? 1 : height;
++
++                    if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
++                        (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
++                    {
++                        // if this assertion is hit we may have issues if app samples
++                        // rightmost/bottommost pixels
++                        ADDR_ASSERT_ALWAYS();
++                    }
++#endif
++                }
++                else // Not BCn format we still keep old way (FMT_1? No real test yet)
++                {
++                    basePitch = (basePitch + expandX - 1) / expandX;
++                    width     = (width + expandX - 1) / expandX;
++                    height    = (height + expandY - 1) / expandY;
++                }
++            }
++
++            *pBasePitch = basePitch; // 0 is legal value for base pitch.
++            *pWidth     = (width == 0) ? 1 : width;
++            *pHeight    = (height == 0) ? 1 : height;
++        } //if (pWidth && pHeight && pBasePitch)
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::RestoreSurfaceInfo
++*
++*   @brief
++*       Reverse operation of AdjustSurfaceInfo
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrElemLib::RestoreSurfaceInfo(
++    AddrElemMode    elemMode,       ///< [in] element mode
++    UINT_32         expandX,        ///< [in] decompression expansion factor in X
++    UINT_32         expandY,        ///< [out] decompression expansion factor in Y
++    UINT_32*        pBpp,           ///< [in/out] bpp
++    UINT_32*        pWidth,         ///< [in/out] width
++    UINT_32*        pHeight)        ///< [in/out] height
++{
++    UINT_32 originalBits;
++    UINT_32 width;
++    UINT_32 height;
++    UINT_32 bpp;
++
++    BOOL_32 bBCnFormat = FALSE;
++
++    ADDR_ASSERT(pBpp != NULL);
++    ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
++
++    if (pBpp)
++    {
++        bpp = *pBpp;
++
++        switch (elemMode)
++        {
++        case ADDR_EXPANDED:
++            originalBits = bpp * expandX * expandY;
++            break;
++        case ADDR_PACKED_STD: // Different bit order
++        case ADDR_PACKED_REV:
++            originalBits = bpp / expandX / expandY;
++            break;
++        case ADDR_PACKED_GBGR:
++        case ADDR_PACKED_BGRG:
++            originalBits = bpp; // 32-bit packed ==> 2 32-bit result
++            break;
++        case ADDR_PACKED_BC1: // Fall through
++        case ADDR_PACKED_BC4:
++            originalBits = 64;
++            bBCnFormat = TRUE;
++            break;
++        case ADDR_PACKED_BC2: // Fall through
++        case ADDR_PACKED_BC3: // Fall through
++            case ADDR_PACKED_BC5:
++            bBCnFormat = TRUE;
++            originalBits = 128;
++            break;
++        case ADDR_ROUND_BY_HALF:  // Fall through
++        case ADDR_ROUND_TRUNCATE: // Fall through
++        case ADDR_ROUND_DITHER:   // Fall through
++        case ADDR_UNCOMPRESSED:
++            originalBits = bpp;
++            break;
++        default:
++            originalBits = bpp;
++            ADDR_ASSERT_ALWAYS();
++            break;
++        }
++
++        *pBpp = originalBits;
++    }
++
++    if (pWidth && pHeight)
++    {
++        width    = *pWidth;
++        height   = *pHeight;
++
++        if ((expandX > 1) || (expandY > 1))
++        {
++            if (elemMode == ADDR_EXPANDED)
++            {
++                width /= expandX;
++                height /= expandY;
++            }
++            else
++            {
++                width *= expandX;
++                height *= expandY;
++            }
++        }
++
++        *pWidth  = (width == 0) ? 1 : width;
++        *pHeight = (height == 0) ? 1 : height;
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::GetBitsPerPixel
++*
++*   @brief
++*       Compute the total bits per element according to a format
++*       code. For compressed formats, this is not the same as
++*       the number of bits per decompressed element.
++*
++*   @return
++*       Bits per pixel
++***************************************************************************************************
++*/
++UINT_32 AddrElemLib::GetBitsPerPixel(
++    AddrFormat          format,         ///< [in] surface format code
++    AddrElemMode*       pElemMode,      ///< [out] element mode
++    UINT_32*            pExpandX,       ///< [out] decompression expansion factor in X
++    UINT_32*            pExpandY,       ///< [out] decompression expansion factor in Y
++    UINT_32*            pUnusedBits)    ///< [out] bits unused
++{
++    UINT_32 bpp;
++    UINT_32 expandX = 1;
++    UINT_32 expandY = 1;
++    UINT_32 bitUnused = 0;
++    AddrElemMode elemMode = ADDR_UNCOMPRESSED; // default value
++
++    switch (format)
++    {
++        case ADDR_FMT_8:
++            bpp = 8;
++            break;
++        case ADDR_FMT_1_5_5_5:
++        case ADDR_FMT_5_6_5:
++        case ADDR_FMT_6_5_5:
++        case ADDR_FMT_8_8:
++        case ADDR_FMT_4_4_4_4:
++        case ADDR_FMT_16:
++        case ADDR_FMT_16_FLOAT:
++            bpp = 16;
++            break;
++        case ADDR_FMT_GB_GR: // treat as FMT_8_8
++            elemMode = ADDR_PACKED_GBGR;
++            bpp = 16;
++            break;
++        case ADDR_FMT_BG_RG: // treat as FMT_8_8
++            elemMode = ADDR_PACKED_BGRG;
++            bpp = 16;
++            break;
++        case ADDR_FMT_8_8_8_8:
++        case ADDR_FMT_2_10_10_10:
++        case ADDR_FMT_10_11_11:
++        case ADDR_FMT_11_11_10:
++        case ADDR_FMT_16_16:
++        case ADDR_FMT_16_16_FLOAT:
++        case ADDR_FMT_32:
++        case ADDR_FMT_32_FLOAT:
++        case ADDR_FMT_24_8:
++        case ADDR_FMT_24_8_FLOAT:
++            bpp = 32;
++            break;
++        case ADDR_FMT_16_16_16_16:
++        case ADDR_FMT_16_16_16_16_FLOAT:
++        case ADDR_FMT_32_32:
++        case ADDR_FMT_32_32_FLOAT:
++        case ADDR_FMT_CTX1:
++            bpp = 64;
++            break;
++        case ADDR_FMT_32_32_32_32:
++        case ADDR_FMT_32_32_32_32_FLOAT:
++            bpp = 128;
++            break;
++        case ADDR_FMT_INVALID:
++            bpp = 0;
++            break;
++        case ADDR_FMT_1_REVERSED:
++            elemMode = ADDR_PACKED_REV;
++            expandX = 8;
++            bpp = 1;
++            break;
++        case ADDR_FMT_1:
++            elemMode = ADDR_PACKED_STD;
++            expandX = 8;
++            bpp = 1;
++            break;
++        case ADDR_FMT_4_4:
++        case ADDR_FMT_3_3_2:
++            bpp = 8;
++            break;
++        case ADDR_FMT_5_5_5_1:
++            bpp = 16;
++            break;
++        case ADDR_FMT_32_AS_8:
++        case ADDR_FMT_32_AS_8_8:
++        case ADDR_FMT_8_24:
++        case ADDR_FMT_8_24_FLOAT:
++        case ADDR_FMT_10_10_10_2:
++        case ADDR_FMT_10_11_11_FLOAT:
++        case ADDR_FMT_11_11_10_FLOAT:
++        case ADDR_FMT_5_9_9_9_SHAREDEXP:
++            bpp = 32;
++            break;
++        case ADDR_FMT_X24_8_32_FLOAT:
++            bpp = 64;
++            bitUnused = 24;
++            break;
++        case ADDR_FMT_8_8_8:
++            elemMode = ADDR_EXPANDED;
++            bpp = 24;//@@ 8;      // read 3 elements per pixel
++            expandX = 3;
++            break;
++        case ADDR_FMT_16_16_16:
++        case ADDR_FMT_16_16_16_FLOAT:
++            elemMode = ADDR_EXPANDED;
++            bpp = 48;//@@ 16;      // read 3 elements per pixel
++            expandX = 3;
++            break;
++        case ADDR_FMT_32_32_32_FLOAT:
++        case ADDR_FMT_32_32_32:
++            elemMode = ADDR_EXPANDED;
++            expandX = 3;
++            bpp = 96;//@@ 32;      // read 3 elements per pixel
++            break;
++        case ADDR_FMT_BC1:
++            elemMode = ADDR_PACKED_BC1;
++            expandX = 4;
++            expandY = 4;
++            bpp = 64;
++            break;
++        case ADDR_FMT_BC4:
++            elemMode = ADDR_PACKED_BC4;
++            expandX = 4;
++            expandY = 4;
++            bpp = 64;
++            break;
++        case ADDR_FMT_BC2:
++            elemMode = ADDR_PACKED_BC2;
++            expandX = 4;
++            expandY = 4;
++            bpp = 128;
++            break;
++        case ADDR_FMT_BC3:
++            elemMode = ADDR_PACKED_BC3;
++            expandX = 4;
++            expandY = 4;
++            bpp = 128;
++            break;
++        case ADDR_FMT_BC5:
++        case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
++        case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
++            elemMode = ADDR_PACKED_BC5;
++            expandX = 4;
++            expandY = 4;
++            bpp = 128;
++            break;
++        default:
++            bpp = 0;
++            ADDR_ASSERT_ALWAYS();
++            break;
++            // @@ or should this be an error?
++    }
++
++    SafeAssign(pExpandX, expandX);
++    SafeAssign(pExpandY, expandY);
++    SafeAssign(pUnusedBits, bitUnused);
++    SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode);
++
++    return bpp;
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::GetCompBits
++*
++*   @brief
++*       Set each component's bit size and bit start. And set element mode and number type
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrElemLib::GetCompBits(
++    UINT_32 c0,                     ///< [in] bits of component 0
++    UINT_32 c1,                     ///< [in] bits of component 1
++    UINT_32 c2,                     ///< [in] bits of component 2
++    UINT_32 c3,                     ///< [in] bits of component 3
++    ADDR_PIXEL_FORMATINFO* pInfo,   ///< [out] per component info out
++    AddrElemMode elemMode)          ///< [in] element mode
++{
++    pInfo->comps = 0;
++
++    pInfo->compBit[0] = c0;
++    pInfo->compBit[1] = c1;
++    pInfo->compBit[2] = c2;
++    pInfo->compBit[3] = c3;
++
++    pInfo->compStart[0] = 0;
++    pInfo->compStart[1] = c0;
++    pInfo->compStart[2] = c0+c1;
++    pInfo->compStart[3] = c0+c1+c2;
++
++    pInfo->elemMode = elemMode;
++    // still needed since component swap may depend on number of components
++    for (INT i=0; i<4; i++)
++    {
++        if (pInfo->compBit[i] == 0)
++        {
++            pInfo->compStart[i]  = 0;       // all null components start at bit 0
++            pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
++        }
++        else
++        {
++            pInfo->comps++;
++        }
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::GetCompBits
++*
++*   @brief
++*       Set the clear color (or clear depth/stencil) for a surface
++*
++*   @note
++*       If clearColor is zero, a default clear value is used in place of comps[4].
++*       If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrElemLib::SetClearComps(
++    ADDR_FLT_32 comps[4],   ///< [in/out] components
++    BOOL_32 clearColor,     ///< [in] TRUE if clear color is set (CLEAR_COLOR)
++    BOOL_32 float32)        ///< [in] TRUE if float32 component (BLEND_FLOAT32)
++{
++    INT_32 i;
++
++    // Use default clearvalues if clearColor is disabled
++    if (clearColor == FALSE)
++    {
++        for (i=0; i<3; i++)
++        {
++            comps[i].f = 0.0;
++        }
++        comps[3].f = 1.0;
++    }
++
++    // Otherwise use the (modified) clear value
++    else
++    {
++        for (i=0; i<4; i++)
++        {   // If full precision, use clear value unchanged
++            if (float32)
++            {
++                // Do nothing
++                //comps[i] = comps[i];
++            }
++            // Else if it is a NaN, use the standard NaN value
++            else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
++            {
++                comps[i].u = 0xFFC00000;
++            }
++            // Else reduce the mantissa precision
++            else
++            {
++                comps[i].u = comps[i].u & 0xFFFFF000;
++            }
++        }
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::IsBlockCompressed
++*
++*   @brief
++*       TRUE if this is block compressed format
++*
++*   @note
++*
++*   @return
++*       BOOL_32
++***************************************************************************************************
++*/
++BOOL_32 AddrElemLib::IsBlockCompressed(
++    AddrFormat format)  ///< [in] Format
++{
++    return format >= ADDR_FMT_BC1 && format <= ADDR_FMT_BC7;
++}
++
++
++/**
++***************************************************************************************************
++*   AddrElemLib::IsCompressed
++*
++*   @brief
++*       TRUE if this is block compressed format or 1 bit format
++*
++*   @note
++*
++*   @return
++*       BOOL_32
++***************************************************************************************************
++*/
++BOOL_32 AddrElemLib::IsCompressed(
++    AddrFormat format)  ///< [in] Format
++{
++    return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
++}
++
++/**
++***************************************************************************************************
++*   AddrElemLib::IsExpand3x
++*
++*   @brief
++*       TRUE if this is 3x expand format
++*
++*   @note
++*
++*   @return
++*       BOOL_32
++***************************************************************************************************
++*/
++BOOL_32 AddrElemLib::IsExpand3x(
++    AddrFormat format)  ///< [in] Format
++{
++    BOOL_32 is3x = FALSE;
++
++    switch (format)
++    {
++        case ADDR_FMT_8_8_8:
++        case ADDR_FMT_16_16_16:
++        case ADDR_FMT_16_16_16_FLOAT:
++        case ADDR_FMT_32_32_32:
++        case ADDR_FMT_32_32_32_FLOAT:
++            is3x = TRUE;
++            break;
++        default:
++            break;
++    }
++
++    return is3x;
++}
++
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrelemlib.h b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrelemlib.h
+new file mode 100644
+index 0000000..c302b3b
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrelemlib.h
+@@ -0,0 +1,270 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrelemlib.h
++* @brief Contains the class for element/pixel related functions
++***************************************************************************************************
++*/
++
++#ifndef __ELEM_LIB_H__
++#define __ELEM_LIB_H__
++
++#include "addrinterface.h"
++#include "addrobject.h"
++#include "addrcommon.h"
++
++class AddrLib;
++
++// The masks for property bits within the Properties INT_32
++union ADDR_COMPONENT_FLAGS
++{
++    struct
++    {
++        UINT_32 byteAligned    : 1;    ///< all components are byte aligned
++        UINT_32 exportNorm     : 1;    ///< components support R6xx NORM compression
++        UINT_32 floatComp      : 1;    ///< there is at least one floating point component
++    };
++
++    UINT_32 value;
++};
++
++// Copy from legacy lib's AddrNumberType
++enum AddrNumberType
++{
++    // The following number types have the range [-1..1]
++    ADDR_NO_NUMBER,         // This component doesn't exist and has no default value
++    ADDR_EPSILON,           // Force component value to integer 0x00000001
++    ADDR_ZERO,              // Force component value to integer 0x00000000
++    ADDR_ONE,               // Force component value to floating point 1.0
++    // Above values don't have any bits per component (keep ADDR_ONE the last of these)
++
++    ADDR_UNORM,             // Unsigned normalized (repeating fraction) full precision
++    ADDR_SNORM,             // Signed normalized (repeating fraction) full precision
++    ADDR_GAMMA,             // Gamma-corrected, full precision
++
++    ADDR_UNORM_R5XXRB,      // Unsigned normalized (repeating fraction) for r5xx RB
++    ADDR_SNORM_R5XXRB,      // Signed normalized (repeating fraction) for r5xx RB
++    ADDR_GAMMA_R5XXRB,      // Gamma-corrected for r5xx RB (note: unnormalized value)
++    ADDR_UNORM_R5XXBC,      // Unsigned normalized (repeating fraction) for r5xx BC
++    ADDR_SNORM_R5XXBC,      // Signed normalized (repeating fraction) for r5xx BC
++    ADDR_GAMMA_R5XXBC,      // Gamma-corrected for r5xx BC (note: unnormalized value)
++
++    ADDR_UNORM_R6XX,        // Unsigned normalized (repeating fraction) for R6xx
++    ADDR_UNORM_R6XXDB,      // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX
++    ADDR_SNORM_R6XX,        // Signed normalized (repeating fraction) for R6xx
++    ADDR_GAMMA8_R6XX,       // Gamma-corrected for r6xx
++    ADDR_GAMMA8_R7XX_TP,    // Gamma-corrected for r7xx TP 12bit unorm 8.4.
++
++    ADDR_U4FLOATC,          // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1]
++    ADDR_GAMMA_4SEG,        // Gamma-corrected, four segment approximation
++    ADDR_U0FIXED,           // Unsigned 0.N-bit fixed point
++
++    // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine)
++    ADDR_USCALED,           // Unsigned integer converted to/from floating point
++    ADDR_SSCALED,           // Signed integer converted to/from floating point
++    ADDR_USCALED_R5XXRB,    // Unsigned integer to/from floating point for r5xx RB
++    ADDR_SSCALED_R5XXRB,    // Signed integer to/from floating point for r5xx RB
++    ADDR_UINT_BITS,         // Keep in unsigned integer form, clamped to specified range
++    ADDR_SINT_BITS,         // Keep in signed integer form, clamped to specified range
++    ADDR_UINTBITS,          // @@ remove Keep in unsigned integer form, use modulus to reduce bits
++    ADDR_SINTBITS,          // @@ remove Keep in signed integer form, use modulus to reduce bits
++
++    // The following number types and ADDR_U4FLOATC have exponents
++    // (LEAVE ADDR_S8FLOAT first or fix Finish routine)
++    ADDR_S8FLOAT,           // Signed floating point with 8-bit exponent, bias=127
++    ADDR_S8FLOAT32,         // 32-bit IEEE float, passes through NaN values
++    ADDR_S5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
++    ADDR_S5FLOATM,          // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf
++    ADDR_U5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
++    ADDR_U3FLOATM,          // Unsigned floating point with 3-bit exponent, bias=3
++
++    ADDR_S5FIXED,           // Signed 5.N-bit fixed point, with rounding
++
++    ADDR_END_NUMBER         // Used for range comparisons
++};
++
++// Copy from legacy lib's AddrElement
++enum AddrElemMode
++{
++    // These formats allow both packing an unpacking
++    ADDR_ROUND_BY_HALF,     // add 1/2 and truncate when packing this element
++    ADDR_ROUND_TRUNCATE,    // truncate toward 0 for sign/mag, else toward neg
++    ADDR_ROUND_DITHER,      // Pack by dithering -- requires (x,y) position
++
++    // These formats only allow unpacking, no packing
++    ADDR_UNCOMPRESSED,      // Elements are not compressed: one data element per pixel/texel
++    ADDR_EXPANDED,          // Elements are split up and stored in multiple data elements
++    ADDR_PACKED_STD,        // Elements are compressed into ExpandX by ExpandY data elements
++    ADDR_PACKED_REV,        // Like ADDR_PACKED, but X order of pixels is reverved
++    ADDR_PACKED_GBGR,       // Elements are compressed 4:2:2 in G1B_G0R order (high to low)
++    ADDR_PACKED_BGRG,       // Elements are compressed 4:2:2 in BG1_RG0 order (high to low)
++    ADDR_PACKED_BC1,        // Each data element is uncompressed to a 4x4 pixel/texel array
++    ADDR_PACKED_BC2,        // Each data element is uncompressed to a 4x4 pixel/texel array
++    ADDR_PACKED_BC3,        // Each data element is uncompressed to a 4x4 pixel/texel array
++    ADDR_PACKED_BC4,        // Each data element is uncompressed to a 4x4 pixel/texel array
++    ADDR_PACKED_BC5,        // Each data element is uncompressed to a 4x4 pixel/texel array
++
++    // These formats provide various kinds of compression
++    ADDR_ZPLANE_R5XX,       // Compressed Zplane using r5xx architecture format
++    ADDR_ZPLANE_R6XX,       // Compressed Zplane using r6xx architecture format
++    //@@ Fill in the compression modes
++
++    ADDR_END_ELEMENT        // Used for range comparisons
++};
++
++enum AddrDepthPlanarType
++{
++    ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl
++    ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile
++    ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes
++};
++
++/**
++***************************************************************************************************
++*   ADDR_PIXEL_FORMATINFO
++*
++*   @brief
++*       Per component info
++*
++***************************************************************************************************
++*/
++struct ADDR_PIXEL_FORMATINFO
++{
++    UINT_32             compBit[4];
++    AddrNumberType      numType[4];
++    UINT_32             compStart[4];
++    AddrElemMode        elemMode;
++    UINT_32             comps;          ///< Number of components
++};
++
++/**
++***************************************************************************************************
++* @brief This class contains asic indepentent element related attributes and operations
++***************************************************************************************************
++*/
++class AddrElemLib : public AddrObject
++{
++protected:
++    AddrElemLib(AddrLib* const pAddrLib);
++
++public:
++
++    /// Makes this class virtual
++    virtual ~AddrElemLib();
++
++    static AddrElemLib *Create(
++        const AddrLib* const pAddrLib);
++
++    /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx
++    BOOL_32 PixGetExportNorm(
++        AddrColorFormat colorFmt,
++        AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const;
++
++    /// Below method are asic independent, so make them just static.
++    /// Remove static if we need different operation in hwl.
++
++    VOID    Flt32ToDepthPixel(
++        AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const;
++
++    VOID    Flt32ToColorPixel(
++        AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap,
++        const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const;
++
++    static VOID    Flt32sToInt32s(
++        ADDR_FLT_32 value, UINT_32 bits, AddrNumberType numberType, UINT_32* pResult);
++
++    static VOID    Int32sToPixel(
++        UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart,
++        ADDR_COMPONENT_FLAGS properties, UINT_32 resultBits, UINT_8* pPixel);
++
++    VOID    PixGetColorCompInfo(
++        AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap,
++        ADDR_PIXEL_FORMATINFO* pInfo) const;
++
++    VOID    PixGetDepthCompInfo(
++        AddrDepthFormat format, ADDR_PIXEL_FORMATINFO* pInfo) const;
++
++    UINT_32 GetBitsPerPixel(
++        AddrFormat format, AddrElemMode* pElemMode,
++        UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL);
++
++    static VOID    SetClearComps(
++        ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32);
++
++    VOID    AdjustSurfaceInfo(
++        AddrElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
++        UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight);
++
++    VOID    RestoreSurfaceInfo(
++        AddrElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
++        UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight);
++
++    /// Checks if depth and stencil are planar inside a tile
++    BOOL_32 IsDepthStencilTilePlanar()
++    {
++        return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE;
++    }
++
++    /// Sets m_configFlags, copied from AddrLib
++    VOID    SetConfigFlags(ADDR_CONFIG_FLAGS flags)
++    {
++        m_configFlags = flags;
++    }
++
++    static BOOL_32 IsCompressed(AddrFormat format);
++    static BOOL_32 IsBlockCompressed(AddrFormat format);
++    static BOOL_32 IsExpand3x(AddrFormat format);
++
++protected:
++
++    static VOID    GetCompBits(
++        UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3,
++        ADDR_PIXEL_FORMATINFO* pInfo,
++        AddrElemMode elemMode = ADDR_ROUND_BY_HALF);
++
++    static VOID    GetCompType(
++        AddrColorFormat format, AddrSurfaceNumber numType,
++        ADDR_PIXEL_FORMATINFO* pInfo);
++
++    static VOID    GetCompSwap(
++        AddrSurfaceSwap swap, ADDR_PIXEL_FORMATINFO* pInfo);
++
++    static VOID    SwapComps(
++        UINT_32 c0, UINT_32 c1, ADDR_PIXEL_FORMATINFO* pInfo);
++
++private:
++
++    UINT_32             m_fp16ExportNorm;   ///< If allow FP16 to be reported as EXPORT_NORM
++    AddrDepthPlanarType m_depthPlanarType;
++
++    ADDR_CONFIG_FLAGS   m_configFlags;      ///< Copy of AddrLib's configFlags
++    AddrLib* const      m_pAddrLib;         ///< Pointer to parent addrlib instance
++};
++
++#endif
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrlib.cpp b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrlib.cpp
+new file mode 100644
+index 0000000..51b1eab
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrlib.cpp
+@@ -0,0 +1,4028 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrlib.cpp
++* @brief Contains the implementation for the AddrLib base class..
++***************************************************************************************************
++*/
++
++#include "addrinterface.h"
++#include "addrlib.h"
++#include "addrcommon.h"
++
++#if defined(__APPLE__)
++
++UINT_32 div64_32(UINT_64 n, UINT_32 base)
++{
++    UINT_64 rem = n;
++    UINT_64 b = base;
++    UINT_64 res, d = 1;
++    UINT_32 high = rem >> 32;
++
++    res = 0;
++    if (high >= base)
++    {
++        high /= base;
++        res = (UINT_64) high << 32;
++        rem -= (UINT_64) (high*base) << 32;
++    }
++
++    while ((INT_64)b > 0 && b < rem)
++    {
++        b = b+b;
++        d = d+d;
++    }
++
++    do
++    {
++        if (rem >= b)
++        {
++            rem -= b;
++            res += d;
++        }
++        b >>= 1;
++        d >>= 1;
++    } while (d);
++
++    n = res;
++    return rem;
++}
++
++extern "C"
++UINT_32 __umoddi3(UINT_64 n, UINT_32 base)
++{
++    return div64_32(n, base);
++}
++
++#endif // __APPLE__
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               Static Const Member
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++const AddrTileModeFlags AddrLib::m_modeFlags[ADDR_TM_COUNT] =
++{// T   L  1  2  3  P  Pr B
++    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL
++    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED
++    {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1
++    {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK
++    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1
++    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2
++    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4
++    {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK
++    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1
++    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2
++    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4
++    {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK
++    {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1
++    {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK
++    {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1
++    {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK
++    {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK
++    {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK
++    {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE
++    {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1
++    {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1
++    {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1
++    {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK
++    {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK
++    {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK
++};
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               Constructor/Destructor
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrLib::AddrLib
++*
++*   @brief
++*       Constructor for the AddrLib class
++*
++***************************************************************************************************
++*/
++AddrLib::AddrLib() :
++    m_class(BASE_ADDRLIB),
++    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
++    m_chipRevision(0),
++    m_version(ADDRLIB_VERSION),
++    m_pipes(0),
++    m_banks(0),
++    m_pipeInterleaveBytes(0),
++    m_rowSize(0),
++    m_minPitchAlignPixels(1),
++    m_maxSamples(8),
++    m_pElemLib(NULL)
++{
++    m_configFlags.value = 0;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::AddrLib
++*
++*   @brief
++*       Constructor for the AddrLib class with hClient as parameter
++*
++***************************************************************************************************
++*/
++AddrLib::AddrLib(const AddrClient* pClient) :
++    AddrObject(pClient),
++    m_class(BASE_ADDRLIB),
++    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
++    m_chipRevision(0),
++    m_version(ADDRLIB_VERSION),
++    m_pipes(0),
++    m_banks(0),
++    m_pipeInterleaveBytes(0),
++    m_rowSize(0),
++    m_minPitchAlignPixels(1),
++    m_maxSamples(8),
++    m_pElemLib(NULL)
++{
++    m_configFlags.value = 0;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::~AddrLib
++*
++*   @brief
++*       Destructor for the AddrLib class
++*
++***************************************************************************************************
++*/
++AddrLib::~AddrLib()
++{
++    if (m_pElemLib)
++    {
++        delete m_pElemLib;
++    }
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               Initialization/Helper
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrLib::Create
++*
++*   @brief
++*       Creates and initializes AddrLib object.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::Create(
++    const ADDR_CREATE_INPUT* pCreateIn,     ///< [in] pointer to ADDR_CREATE_INPUT
++    ADDR_CREATE_OUTPUT*      pCreateOut)    ///< [out] pointer to ADDR_CREATE_OUTPUT
++{
++    AddrLib* pLib = NULL;
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (pCreateIn->createFlags.fillSizeFields == TRUE)
++    {
++        if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) ||
++            (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if ((returnCode == ADDR_OK)                    &&
++        (pCreateIn->callbacks.allocSysMem != NULL) &&
++        (pCreateIn->callbacks.freeSysMem != NULL))
++    {
++        AddrClient client = {
++            pCreateIn->hClient,
++            pCreateIn->callbacks
++        };
++
++        switch (pCreateIn->chipEngine)
++        {
++            case CIASICIDGFXENGINE_SOUTHERNISLAND:
++                switch (pCreateIn->chipFamily)
++                {
++                    case FAMILY_SI:
++                        pLib = AddrSIHwlInit(&client);
++                        break;
++                    case FAMILY_VI:
++                    case FAMILY_CZ: // VI based fusion(carrizo)
++                    case FAMILY_CI:
++                    case FAMILY_KV: // CI based fusion
++                        pLib = AddrCIHwlInit(&client);
++                        break;
++                    default:
++                        ADDR_ASSERT_ALWAYS();
++                        break;
++                }
++                break;
++            default:
++                ADDR_ASSERT_ALWAYS();
++                break;
++        }
++    }
++
++    if ((pLib != NULL))
++    {
++        BOOL_32 initValid;
++
++        // Pass createFlags to configFlags first since these flags may be overwritten
++        pLib->m_configFlags.noCubeMipSlicesPad  = pCreateIn->createFlags.noCubeMipSlicesPad;
++        pLib->m_configFlags.fillSizeFields      = pCreateIn->createFlags.fillSizeFields;
++        pLib->m_configFlags.useTileIndex        = pCreateIn->createFlags.useTileIndex;
++        pLib->m_configFlags.useCombinedSwizzle  = pCreateIn->createFlags.useCombinedSwizzle;
++        pLib->m_configFlags.checkLast2DLevel    = pCreateIn->createFlags.checkLast2DLevel;
++        pLib->m_configFlags.useHtileSliceAlign  = pCreateIn->createFlags.useHtileSliceAlign;
++        pLib->m_configFlags.degradeBaseLevel    = pCreateIn->createFlags.degradeBaseLevel;
++        pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
++
++        pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
++
++        pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
++
++        // Global parameters initialized and remaining configFlags bits are set as well
++        initValid = pLib->HwlInitGlobalParams(pCreateIn);
++
++        if (initValid)
++        {
++            pLib->m_pElemLib = AddrElemLib::Create(pLib);
++        }
++        else
++        {
++            pLib->m_pElemLib = NULL; // Don't go on allocating element lib
++            returnCode = ADDR_INVALIDGBREGVALUES;
++        }
++
++        if (pLib->m_pElemLib == NULL)
++        {
++            delete pLib;
++            pLib = NULL;
++            ADDR_ASSERT_ALWAYS();
++        }
++        else
++        {
++            pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags);
++        }
++    }
++
++    pCreateOut->hLib = pLib;
++
++    if ((pLib == NULL) &&
++        (returnCode == ADDR_OK))
++    {
++        // Unknown failures, we return the general error code
++        returnCode = ADDR_ERROR;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::SetAddrChipFamily
++*
++*   @brief
++*       Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
++*   @return
++*      N/A
++***************************************************************************************************
++*/
++VOID AddrLib::SetAddrChipFamily(
++    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
++    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
++{
++    AddrChipFamily family = ADDR_CHIP_FAMILY_IVLD;
++
++    family = HwlConvertChipFamily(uChipFamily, uChipRevision);
++
++    ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD);
++
++    m_chipFamily    = family;
++    m_chipRevision  = uChipRevision;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::SetMinPitchAlignPixels
++*
++*   @brief
++*       Set m_minPitchAlignPixels with input param
++*
++*   @return
++*      N/A
++***************************************************************************************************
++*/
++VOID AddrLib::SetMinPitchAlignPixels(
++    UINT_32 minPitchAlignPixels)    ///< [in] minmum pitch alignment in pixels
++{
++    m_minPitchAlignPixels = (minPitchAlignPixels == 0)? 1 : minPitchAlignPixels;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::GetAddrLib
++*
++*   @brief
++*       Get AddrLib pointer
++*
++*   @return
++*      An AddrLib class pointer
++***************************************************************************************************
++*/
++AddrLib * AddrLib::GetAddrLib(
++    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
++{
++    return static_cast<AddrLib *>(hLib);
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               Surface Methods
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeSurfaceInfo
++*
++*   @brief
++*       Interface function stub of AddrComputeSurfaceInfo.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo(
++     const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
++     ADDR_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
++     ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    // We suggest client do sanity check but a check here is also good
++    if (pIn->bpp > 128)
++    {
++        returnCode = ADDR_INVALIDPARAMS;
++    }
++
++    // Thick modes don't support multisample
++    if (ComputeSurfaceThickness(pIn->tileMode) > 1 && pIn->numSamples > 1)
++    {
++        returnCode = ADDR_INVALIDPARAMS;
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        // Get a local copy of input structure and only reference pIn for unadjusted values
++        ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
++        ADDR_TILEINFO tileInfoNull = {0};
++
++        if (UseTileInfo())
++        {
++            // If the original input has a valid ADDR_TILEINFO pointer then copy its contents.
++            // Otherwise the default 0's in tileInfoNull are used.
++            if (pIn->pTileInfo)
++            {
++                tileInfoNull = *pIn->pTileInfo;
++            }
++            localIn.pTileInfo  = &tileInfoNull;
++        }
++
++        localIn.numSamples = pIn->numSamples == 0 ? 1 : pIn->numSamples;
++
++        // Do mipmap check first
++        // If format is BCn, pre-pad dimension to power-of-two according to HWL
++        ComputeMipLevel(&localIn);
++
++        if (m_configFlags.checkLast2DLevel)
++        {
++            // Save this level's original height in pixels
++            pOut->height = pIn->height;
++        }
++
++        UINT_32 expandX = 1;
++        UINT_32 expandY = 1;
++        AddrElemMode elemMode;
++
++        // Save outputs that may not go through HWL
++        pOut->pixelBits = localIn.bpp;
++        pOut->numSamples = localIn.numSamples;
++        pOut->last2DLevel = FALSE;
++
++#if !ALT_TEST
++        if (localIn.numSamples > 1)
++        {
++            ADDR_ASSERT(localIn.mipLevel == 0);
++        }
++#endif
++
++        if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion
++        {
++            // Get compression/expansion factors and element mode
++            // (which indicates compression/expansion
++            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
++                                                        &elemMode,
++                                                        &expandX,
++                                                        &expandY);
++
++            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
++            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
++            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
++            // restrictions are different.
++            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
++            // but we use this flag to skip RestoreSurfaceInfo below
++
++            if ((elemMode == ADDR_EXPANDED) &&
++                (expandX > 1))
++            {
++                ADDR_ASSERT(localIn.tileMode == ADDR_TM_LINEAR_ALIGNED || localIn.height == 1);
++            }
++
++            GetElemLib()->AdjustSurfaceInfo(elemMode,
++                                            expandX,
++                                            expandY,
++                                            &localIn.bpp,
++                                            &localIn.basePitch,
++                                            &localIn.width,
++                                            &localIn.height);
++
++            // Overwrite these parameters if we have a valid format
++        }
++        else if (localIn.bpp != 0)
++        {
++            localIn.width  = (localIn.width != 0) ? localIn.width : 1;
++            localIn.height = (localIn.height != 0) ? localIn.height : 1;
++        }
++        else // Rule out some invalid parameters
++        {
++            ADDR_ASSERT_ALWAYS();
++
++            returnCode = ADDR_INVALIDPARAMS;
++        }
++
++        // Check mipmap after surface expansion
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = PostComputeMipLevel(&localIn, pOut);
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            if (UseTileIndex(localIn.tileIndex))
++            {
++                // Make sure pTileInfo is not NULL
++                ADDR_ASSERT(localIn.pTileInfo);
++
++                UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags);
++
++                INT_32 macroModeIndex = TileIndexNoMacroIndex;
++
++                if (localIn.tileIndex != TileIndexLinearGeneral)
++                {
++                    // Try finding a macroModeIndex
++                    macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex,
++                                                              localIn.flags,
++                                                              localIn.bpp,
++                                                              numSamples,
++                                                              localIn.pTileInfo,
++                                                              &localIn.tileMode,
++                                                              &localIn.tileType);
++                }
++
++                // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
++                if (macroModeIndex == TileIndexNoMacroIndex)
++                {
++                    returnCode = HwlSetupTileCfg(localIn.tileIndex, macroModeIndex,
++                                                 localIn.pTileInfo,
++                                                 &localIn.tileMode, &localIn.tileType);
++                }
++                // If macroModeIndex is invalid, then assert this is not macro tiled
++                else if (macroModeIndex == TileIndexInvalid)
++                {
++                    ADDR_ASSERT(!IsMacroTiled(localIn.tileMode));
++                }
++            }
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            AddrTileMode tileMode = localIn.tileMode;
++            AddrTileType tileType = localIn.tileType;
++
++            // HWL layer may override tile mode if necessary
++            if (HwlOverrideTileMode(&localIn, &tileMode, &tileType))
++            {
++                localIn.tileMode = tileMode;
++                localIn.tileType = tileType;
++            }
++            // Degrade base level if applicable
++            if (DegradeBaseLevel(&localIn, &tileMode))
++            {
++                localIn.tileMode = tileMode;
++            }
++        }
++
++        // Call main function to compute surface info
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            // Since bpp might be changed we just pass it through
++            pOut->bpp  = localIn.bpp;
++
++            // Also original width/height/bpp
++            pOut->pixelPitch    = pOut->pitch;
++            pOut->pixelHeight   = pOut->height;
++
++#if DEBUG
++            if (localIn.flags.display)
++            {
++                ADDR_ASSERT((pOut->pitchAlign % 32) == 0);
++            }
++#endif //DEBUG
++
++            if (localIn.format != ADDR_FMT_INVALID)
++            {
++                //
++                // 96 bits surface of level 1+ requires element pitch of 32 bits instead
++                // In hwl function we skip multiplication of 3 then we should skip division of 3
++                // We keep pitch that represents 32 bit element instead of 96 bits since we
++                // will get an odd number if divided by 3.
++                //
++                if (!((expandX == 3) && (localIn.mipLevel > 0)))
++                {
++
++                    GetElemLib()->RestoreSurfaceInfo(elemMode,
++                                                     expandX,
++                                                     expandY,
++                                                     &localIn.bpp,
++                                                     &pOut->pixelPitch,
++                                                     &pOut->pixelHeight);
++                }
++            }
++
++            if (localIn.flags.qbStereo)
++            {
++                if (pOut->pStereoInfo)
++                {
++                    ComputeQbStereoInfo(pOut);
++                }
++            }
++
++            if (localIn.flags.volume) // For volume sliceSize equals to all z-slices
++            {
++                pOut->sliceSize = pOut->surfSize;
++            }
++            else // For array: sliceSize is likely to have slice-padding (the last one)
++            {
++                pOut->sliceSize = pOut->surfSize / pOut->depth;
++
++                // array or cubemap
++                if (pIn->numSlices > 1)
++                {
++                    // If this is the last slice then add the padding size to this slice
++                    if (pIn->slice == (pIn->numSlices - 1))
++                    {
++                        pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices);
++                    }
++                    else if (m_configFlags.checkLast2DLevel)
++                    {
++                        // Reset last2DLevel flag if this is not the last array slice
++                        pOut->last2DLevel = FALSE;
++                    }
++                }
++            }
++
++            pOut->pitchTileMax = pOut->pitch / 8 - 1;
++            pOut->heightTileMax = pOut->height / 8 - 1;
++            pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1;
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeSurfaceInfo
++*
++*   @brief
++*       Interface function stub of AddrComputeSurfaceInfo.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeSurfaceAddrFromCoord(
++    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            const ADDR_SURFACE_FLAGS flags = {{0}};
++            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
++
++            // Try finding a macroModeIndex
++            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
++                                                             flags,
++                                                             input.bpp,
++                                                             numSamples,
++                                                             input.pTileInfo,
++                                                             &input.tileMode,
++                                                             &input.tileType);
++
++            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
++            if (macroModeIndex == TileIndexNoMacroIndex)
++            {
++                returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
++                                             input.pTileInfo, &input.tileMode, &input.tileType);
++            }
++            // If macroModeIndex is invalid, then assert this is not macro tiled
++            else if (macroModeIndex == TileIndexInvalid)
++            {
++                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
++            }
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut);
++
++            if (returnCode == ADDR_OK)
++            {
++                pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
++            }
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeSurfaceCoordFromAddr
++*
++*   @brief
++*       Interface function stub of ComputeSurfaceCoordFromAddr.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeSurfaceCoordFromAddr(
++    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            const ADDR_SURFACE_FLAGS flags = {{0}};
++            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
++
++            // Try finding a macroModeIndex
++            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
++                                                             flags,
++                                                             input.bpp,
++                                                             numSamples,
++                                                             input.pTileInfo,
++                                                             &input.tileMode,
++                                                             &input.tileType);
++
++            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
++            if (macroModeIndex == TileIndexNoMacroIndex)
++            {
++                returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
++                                             input.pTileInfo, &input.tileMode, &input.tileType);
++            }
++            // If macroModeIndex is invalid, then assert this is not macro tiled
++            else if (macroModeIndex == TileIndexInvalid)
++            {
++                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
++            }
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeSliceTileSwizzle
++*
++*   @brief
++*       Interface function stub of ComputeSliceTileSwizzle.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeSliceTileSwizzle(
++    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_SLICESWIZZLE_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex,
++                                         input.pTileInfo, &input.tileMode);
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = HwlComputeSliceTileSwizzle(pIn, pOut);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ExtractBankPipeSwizzle
++*
++*   @brief
++*       Interface function stub of AddrExtractBankPipeSwizzle.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ExtractBankPipeSwizzle(
++    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
++    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) ||
++            (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = HwlExtractBankPipeSwizzle(pIn, pOut);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::CombineBankPipeSwizzle
++*
++*   @brief
++*       Interface function stub of AddrCombineBankPipeSwizzle.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::CombineBankPipeSwizzle(
++    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
++    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle,
++                                                   pIn->pipeSwizzle,
++                                                   pIn->pTileInfo,
++                                                   pIn->baseAddr,
++                                                   &pOut->tileSwizzle);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeBaseSwizzle
++*
++*   @brief
++*       Interface function stub of AddrCompueBaseSwizzle.
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeBaseSwizzle(
++    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
++    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_BASE_SWIZZLE_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            if (IsMacroTiled(pIn->tileMode))
++            {
++                returnCode = HwlComputeBaseSwizzle(pIn, pOut);
++            }
++            else
++            {
++                pOut->tileSwizzle = 0;
++            }
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeFmaskInfo
++*
++*   @brief
++*       Interface function stub of ComputeFmaskInfo.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeFmaskInfo(
++    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,    ///< [in] input structure
++    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
++    )
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    // No thick MSAA
++    if (ComputeSurfaceThickness(pIn->tileMode) > 1)
++    {
++        returnCode = ADDR_INVALIDPARAMS;
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_FMASK_INFO_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++
++            if (pOut->pTileInfo)
++            {
++                // Use temp tile info for calcalation
++                input.pTileInfo = pOut->pTileInfo;
++            }
++            else
++            {
++                input.pTileInfo = &tileInfoNull;
++            }
++
++            ADDR_SURFACE_FLAGS flags = {{0}};
++            flags.fmask = 1;
++
++            // Try finding a macroModeIndex
++            INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex,
++                                                             flags,
++                                                             HwlComputeFmaskBits(pIn, NULL),
++                                                             pIn->numSamples,
++                                                             input.pTileInfo,
++                                                             &input.tileMode);
++
++            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
++            if (macroModeIndex == TileIndexNoMacroIndex)
++            {
++                returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
++                                             input.pTileInfo, &input.tileMode);
++            }
++
++            ADDR_ASSERT(macroModeIndex != TileIndexInvalid);
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            if (pIn->numSamples > 1)
++            {
++                returnCode = HwlComputeFmaskInfo(pIn, pOut);
++            }
++            else
++            {
++                memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT));
++
++                returnCode = ADDR_INVALIDPARAMS;
++            }
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeFmaskAddrFromCoord
++*
++*   @brief
++*       Interface function stub of ComputeFmaskAddrFromCoord.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeFmaskAddrFromCoord(
++    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_ASSERT(pIn->numSamples > 1);
++
++        if (pIn->numSamples > 1)
++        {
++            returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut);
++        }
++        else
++        {
++            returnCode = ADDR_INVALIDPARAMS;
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeFmaskCoordFromAddr
++*
++*   @brief
++*       Interface function stub of ComputeFmaskAddrFromCoord.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeFmaskCoordFromAddr(
++    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,     ///< [in] input structure
++    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut           ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_ASSERT(pIn->numSamples > 1);
++
++        if (pIn->numSamples > 1)
++        {
++            returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut);
++        }
++        else
++        {
++            returnCode = ADDR_INVALIDPARAMS;
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ConvertTileInfoToHW
++*
++*   @brief
++*       Convert tile info from real value to HW register value in HW layer
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ConvertTileInfoToHW(
++    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
++    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) ||
++            (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_CONVERT_TILEINFOTOHW_INPUT input;
++        // if pIn->reverse is TRUE, indices are ignored
++        if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = HwlConvertTileInfoToHW(pIn, pOut);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ConvertTileIndex
++*
++*   @brief
++*       Convert tile index to tile mode/type/info
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ConvertTileIndex(
++    const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure
++    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) ||
++            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++
++        returnCode = HwlSetupTileCfg(pIn->tileIndex, pIn->macroModeIndex,
++                                     pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
++
++        if (returnCode == ADDR_OK && pIn->tileInfoHw)
++        {
++            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
++            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
++
++            hwInput.pTileInfo = pOut->pTileInfo;
++            hwInput.tileIndex = -1;
++            hwOutput.pTileInfo = pOut->pTileInfo;
++
++            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ConvertTileIndex1
++*
++*   @brief
++*       Convert tile index to tile mode/type/info
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ConvertTileIndex1(
++    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,   ///< [in] input structure
++    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut         ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) ||
++            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_SURFACE_FLAGS flags = {{0}};
++
++        HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples,
++                                 pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
++
++        if (pIn->tileInfoHw)
++        {
++            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
++            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
++
++            hwInput.pTileInfo = pOut->pTileInfo;
++            hwInput.tileIndex = -1;
++            hwOutput.pTileInfo = pOut->pTileInfo;
++
++            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::GetTileIndex
++*
++*   @brief
++*       Get tile index from tile mode/type/info
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::GetTileIndex(
++    const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure
++    ADDR_GET_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) ||
++            (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        returnCode = HwlGetTileIndex(pIn, pOut);
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeSurfaceThickness
++*
++*   @brief
++*       Compute surface thickness
++*
++*   @return
++*       Surface thickness
++***************************************************************************************************
++*/
++UINT_32 AddrLib::ComputeSurfaceThickness(
++    AddrTileMode tileMode)    ///< [in] tile mode
++{
++    return m_modeFlags[tileMode].thickness;
++}
++
++
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               CMASK/HTILE
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeHtileInfo
++*
++*   @brief
++*       Interface function stub of AddrComputeHtilenfo
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeHtileInfo(
++    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
++    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
++    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_HTILE_INFO_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            pOut->bpp = ComputeHtileInfo(pIn->flags,
++                                         pIn->pitch,
++                                         pIn->height,
++                                         pIn->numSlices,
++                                         pIn->isLinear,
++                                         isWidth8,
++                                         isHeight8,
++                                         pIn->pTileInfo,
++                                         &pOut->pitch,
++                                         &pOut->height,
++                                         &pOut->htileBytes,
++                                         &pOut->macroWidth,
++                                         &pOut->macroHeight,
++                                         &pOut->sliceSize,
++                                         &pOut->baseAlign);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeCmaskInfo
++*
++*   @brief
++*       Interface function stub of AddrComputeCmaskInfo
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeCmaskInfo(
++    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
++    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_CMASK_INFO_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            returnCode = ComputeCmaskInfo(pIn->flags,
++                                          pIn->pitch,
++                                          pIn->height,
++                                          pIn->numSlices,
++                                          pIn->isLinear,
++                                          pIn->pTileInfo,
++                                          &pOut->pitch,
++                                          &pOut->height,
++                                          &pOut->cmaskBytes,
++                                          &pOut->macroWidth,
++                                          &pOut->macroHeight,
++                                          &pOut->sliceSize,
++                                          &pOut->baseAlign,
++                                          &pOut->blockMax);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeDccInfo
++*
++*   @brief
++*       Interface function to compute DCC key info
++*
++*   @return
++*       return code of HwlComputeDccInfo
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeDccInfo(
++    const ADDR_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
++    ADDR_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE ret = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT)))
++        {
++            ret = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (ret == ADDR_OK)
++    {
++        ADDR_COMPUTE_DCCINFO_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++
++            ret = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex,
++                                  &input.tileInfo, &input.tileMode);
++
++            pIn = &input;
++        }
++
++        if (ADDR_OK == ret)
++        {
++            ret = HwlComputeDccInfo(pIn, pOut);
++        }
++    }
++
++    return ret;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeHtileAddrFromCoord
++*
++*   @brief
++*       Interface function stub of AddrComputeHtileAddrFromCoord
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeHtileAddrFromCoord(
++    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
++    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
++                                                      pIn->height,
++                                                      pIn->x,
++                                                      pIn->y,
++                                                      pIn->slice,
++                                                      pIn->numSlices,
++                                                      1,
++                                                      pIn->isLinear,
++                                                      isWidth8,
++                                                      isHeight8,
++                                                      pIn->pTileInfo,
++                                                      &pOut->bitPosition);
++        }
++    }
++
++    return returnCode;
++
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeHtileCoordFromAddr
++*
++*   @brief
++*       Interface function stub of AddrComputeHtileCoordFromAddr
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeHtileCoordFromAddr(
++    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
++    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            HwlComputeXmaskCoordFromAddr(pIn->addr,
++                                         pIn->bitPosition,
++                                         pIn->pitch,
++                                         pIn->height,
++                                         pIn->numSlices,
++                                         1,
++                                         pIn->isLinear,
++                                         isWidth8,
++                                         isHeight8,
++                                         pIn->pTileInfo,
++                                         &pOut->x,
++                                         &pOut->y,
++                                         &pOut->slice);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeCmaskAddrFromCoord
++*
++*   @brief
++*       Interface function stub of AddrComputeCmaskAddrFromCoord
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeCmaskAddrFromCoord(
++    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            if (pIn->flags.tcCompatible == TRUE)
++            {
++                returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
++            }
++            else
++            {
++                pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
++                                                          pIn->height,
++                                                          pIn->x,
++                                                          pIn->y,
++                                                          pIn->slice,
++                                                          pIn->numSlices,
++                                                          2,
++                                                          pIn->isLinear,
++                                                          FALSE, //this is cmask, isWidth8 is not needed
++                                                          FALSE, //this is cmask, isHeight8 is not needed
++                                                          pIn->pTileInfo,
++                                                          &pOut->bitPosition);
++            }
++
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeCmaskCoordFromAddr
++*
++*   @brief
++*       Interface function stub of AddrComputeCmaskCoordFromAddr
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeCmaskCoordFromAddr(
++    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) ||
++            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        ADDR_TILEINFO tileInfoNull;
++        ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input;
++
++        if (UseTileIndex(pIn->tileIndex))
++        {
++            input = *pIn;
++            // Use temp tile info for calcalation
++            input.pTileInfo = &tileInfoNull;
++
++            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
++
++            // Change the input structure
++            pIn = &input;
++        }
++
++        if (returnCode == ADDR_OK)
++        {
++            HwlComputeXmaskCoordFromAddr(pIn->addr,
++                                         pIn->bitPosition,
++                                         pIn->pitch,
++                                         pIn->height,
++                                         pIn->numSlices,
++                                         2,
++                                         pIn->isLinear,
++                                         FALSE,
++                                         FALSE,
++                                         pIn->pTileInfo,
++                                         &pOut->x,
++                                         &pOut->y,
++                                         &pOut->slice);
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeTileDataWidthAndHeight
++*
++*   @brief
++*       Compute the squared cache shape for per-tile data (CMASK and HTILE)
++*
++*   @return
++*       N/A
++*
++*   @note
++*       MacroWidth and macroHeight are measured in pixels
++***************************************************************************************************
++*/
++VOID AddrLib::ComputeTileDataWidthAndHeight(
++    UINT_32         bpp,             ///< [in] bits per pixel
++    UINT_32         cacheBits,       ///< [in] bits of cache
++    ADDR_TILEINFO*  pTileInfo,       ///< [in] Tile info
++    UINT_32*        pMacroWidth,     ///< [out] macro tile width
++    UINT_32*        pMacroHeight     ///< [out] macro tile height
++    ) const
++{
++    UINT_32 height = 1;
++    UINT_32 width  = cacheBits / bpp;
++    UINT_32 pipes  = HwlGetPipes(pTileInfo);
++
++    // Double height until the macro-tile is close to square
++    // Height can only be doubled if width is even
++
++    while ((width > height * 2 * pipes) && !(width & 1))
++    {
++        width  /= 2;
++        height *= 2;
++    }
++
++    *pMacroWidth  = 8 * width;
++    *pMacroHeight = 8 * height * pipes;
++
++    // Note: The above iterative comptuation is equivalent to the following
++    //
++    //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2);
++    //int macroHeight = pow2( 3+log2(pipes)+log2_height );
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::HwlComputeTileDataWidthAndHeightLinear
++*
++*   @brief
++*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
++*
++*   @return
++*       N/A
++*
++*   @note
++*       MacroWidth and macroHeight are measured in pixels
++***************************************************************************************************
++*/
++VOID AddrLib::HwlComputeTileDataWidthAndHeightLinear(
++    UINT_32*        pMacroWidth,     ///< [out] macro tile width
++    UINT_32*        pMacroHeight,    ///< [out] macro tile height
++    UINT_32         bpp,             ///< [in] bits per pixel
++    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
++    ) const
++{
++    ADDR_ASSERT(bpp != 4);              // Cmask does not support linear layout prior to SI
++    *pMacroWidth  = 8 * 512 / bpp;      // Align width to 512-bit memory accesses
++    *pMacroHeight = 8 * m_pipes;        // Align height to number of pipes
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeHtileInfo
++*
++*   @brief
++*       Compute htile pitch,width, bytes per 2D slice
++*
++*   @return
++*       Htile bpp i.e. How many bits for an 8x8 tile
++*       Also returns by output parameters:
++*       *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size*
++***************************************************************************************************
++*/
++UINT_32 AddrLib::ComputeHtileInfo(
++    ADDR_HTILE_FLAGS flags,             ///< [in] htile flags
++    UINT_32          pitchIn,           ///< [in] pitch input
++    UINT_32          heightIn,          ///< [in] height input
++    UINT_32          numSlices,         ///< [in] number of slices
++    BOOL_32          isLinear,          ///< [in] if it is linear mode
++    BOOL_32          isWidth8,          ///< [in] if htile block width is 8
++    BOOL_32          isHeight8,         ///< [in] if htile block height is 8
++    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
++    UINT_32*         pPitchOut,         ///< [out] pitch output
++    UINT_32*         pHeightOut,        ///< [out] height output
++    UINT_64*         pHtileBytes,       ///< [out] bytes per 2D slice
++    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
++    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
++    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
++    UINT_32*         pBaseAlign         ///< [out] base alignment
++    ) const
++{
++
++    UINT_32 macroWidth;
++    UINT_32 macroHeight;
++    UINT_32 baseAlign;
++    UINT_64 surfBytes;
++    UINT_64 sliceBytes;
++
++    numSlices = Max(1u, numSlices);
++
++    const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8);
++    const UINT_32 cacheBits = HtileCacheBits;
++
++    if (isLinear)
++    {
++        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
++                                               &macroHeight,
++                                               bpp,
++                                               pTileInfo);
++    }
++    else
++    {
++        ComputeTileDataWidthAndHeight(bpp,
++                                      cacheBits,
++                                      pTileInfo,
++                                      &macroWidth,
++                                      &macroHeight);
++    }
++
++    *pPitchOut = PowTwoAlign(pitchIn,  macroWidth);
++    *pHeightOut = PowTwoAlign(heightIn,  macroHeight);
++
++    baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo);
++
++    surfBytes = HwlComputeHtileBytes(*pPitchOut,
++                                     *pHeightOut,
++                                     bpp,
++                                     isLinear,
++                                     numSlices,
++                                     &sliceBytes,
++                                     baseAlign);
++
++    *pHtileBytes = surfBytes;
++
++    //
++    // Use SafeAssign since they are optional
++    //
++    SafeAssign(pMacroWidth, macroWidth);
++
++    SafeAssign(pMacroHeight, macroHeight);
++
++    SafeAssign(pSliceSize,  sliceBytes);
++
++    SafeAssign(pBaseAlign, baseAlign);
++
++    return bpp;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeCmaskBaseAlign
++*
++*   @brief
++*       Compute cmask base alignment
++*
++*   @return
++*       Cmask base alignment
++***************************************************************************************************
++*/
++UINT_32 AddrLib::ComputeCmaskBaseAlign(
++    ADDR_CMASK_FLAGS flags,           ///< [in] Cmask flags
++    ADDR_TILEINFO*   pTileInfo        ///< [in] Tile info
++    ) const
++{
++    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
++
++    if (flags.tcCompatible)
++    {
++        ADDR_ASSERT(pTileInfo != NULL);
++        if (pTileInfo)
++        {
++            baseAlign *= pTileInfo->banks;
++        }
++    }
++
++    return baseAlign;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeCmaskBytes
++*
++*   @brief
++*       Compute cmask size in bytes
++*
++*   @return
++*       Cmask size in bytes
++***************************************************************************************************
++*/
++UINT_64 AddrLib::ComputeCmaskBytes(
++    UINT_32 pitch,        ///< [in] pitch
++    UINT_32 height,       ///< [in] height
++    UINT_32 numSlices     ///< [in] number of slices
++    ) const
++{
++    return BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * numSlices * CmaskElemBits) /
++        MicroTilePixels;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeCmaskInfo
++*
++*   @brief
++*       Compute cmask pitch,width, bytes per 2D slice
++*
++*   @return
++*       BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes,
++*       macro-tile dimensions
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputeCmaskInfo(
++    ADDR_CMASK_FLAGS flags,            ///< [in] cmask flags
++    UINT_32          pitchIn,           ///< [in] pitch input
++    UINT_32          heightIn,          ///< [in] height input
++    UINT_32          numSlices,         ///< [in] number of slices
++    BOOL_32          isLinear,          ///< [in] is linear mode
++    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
++    UINT_32*         pPitchOut,         ///< [out] pitch output
++    UINT_32*         pHeightOut,        ///< [out] height output
++    UINT_64*         pCmaskBytes,       ///< [out] bytes per 2D slice
++    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
++    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
++    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
++    UINT_32*         pBaseAlign,        ///< [out] base alignment
++    UINT_32*         pBlockMax          ///< [out] block max == slice / 128 / 128 - 1
++    ) const
++{
++    UINT_32 macroWidth;
++    UINT_32 macroHeight;
++    UINT_32 baseAlign;
++    UINT_64 surfBytes;
++    UINT_64 sliceBytes;
++
++    numSlices = Max(1u, numSlices);
++
++    const UINT_32 bpp = CmaskElemBits;
++    const UINT_32 cacheBits = CmaskCacheBits;
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (isLinear)
++    {
++        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
++                                               &macroHeight,
++                                               bpp,
++                                               pTileInfo);
++    }
++    else
++    {
++        ComputeTileDataWidthAndHeight(bpp,
++                                      cacheBits,
++                                      pTileInfo,
++                                      &macroWidth,
++                                      &macroHeight);
++    }
++
++    *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1);
++    *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1);
++
++
++    sliceBytes = ComputeCmaskBytes(*pPitchOut,
++                                   *pHeightOut,
++                                   1);
++
++    baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo);
++
++    while (sliceBytes % baseAlign)
++    {
++        *pHeightOut += macroHeight;
++
++        sliceBytes = ComputeCmaskBytes(*pPitchOut,
++                                       *pHeightOut,
++                                       1);
++    }
++
++    surfBytes = sliceBytes * numSlices;
++
++    *pCmaskBytes = surfBytes;
++
++    //
++    // Use SafeAssign since they are optional
++    //
++    SafeAssign(pMacroWidth, macroWidth);
++
++    SafeAssign(pMacroHeight, macroHeight);
++
++    SafeAssign(pBaseAlign, baseAlign);
++
++    SafeAssign(pSliceSize, sliceBytes);
++
++    UINT_32 slice = (*pPitchOut) * (*pHeightOut);
++    UINT_32 blockMax = slice / 128 / 128 - 1;
++
++#if DEBUG
++    if (slice % (64*256) != 0)
++    {
++        ADDR_ASSERT_ALWAYS();
++    }
++#endif //DEBUG
++
++    UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax();
++
++    if (blockMax > maxBlockMax)
++    {
++        blockMax = maxBlockMax;
++        returnCode = ADDR_INVALIDPARAMS;
++    }
++
++    SafeAssign(pBlockMax, blockMax);
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeXmaskCoordYFromPipe
++*
++*   @brief
++*       Compute the Y coord from pipe number for cmask/htile
++*
++*   @return
++*       Y coordinate
++*
++***************************************************************************************************
++*/
++UINT_32 AddrLib::ComputeXmaskCoordYFromPipe(
++    UINT_32         pipe,       ///< [in] pipe number
++    UINT_32         x           ///< [in] x coordinate
++    ) const
++{
++    UINT_32 pipeBit0;
++    UINT_32 pipeBit1;
++    UINT_32 xBit0;
++    UINT_32 xBit1;
++    UINT_32 yBit0;
++    UINT_32 yBit1;
++
++    UINT_32 y = 0;
++
++    UINT_32 numPipes = m_pipes; // SI has its implementation
++    //
++    // Convert pipe + x to y coordinate.
++    //
++    switch (numPipes)
++    {
++        case 1:
++            //
++            // 1 pipe
++            //
++            // p0 = 0
++            //
++            y = 0;
++            break;
++        case 2:
++            //
++            // 2 pipes
++            //
++            // p0 = x0 ^ y0
++            //
++            // y0 = p0 ^ x0
++            //
++            pipeBit0 = pipe & 0x1;
++
++            xBit0 = x & 0x1;
++
++            yBit0 = pipeBit0 ^ xBit0;
++
++            y = yBit0;
++            break;
++        case 4:
++            //
++            // 4 pipes
++            //
++            // p0 = x1 ^ y0
++            // p1 = x0 ^ y1
++            //
++            // y0 = p0 ^ x1
++            // y1 = p1 ^ x0
++            //
++            pipeBit0 =  pipe & 0x1;
++            pipeBit1 = (pipe & 0x2) >> 1;
++
++            xBit0 =  x & 0x1;
++            xBit1 = (x & 0x2) >> 1;
++
++            yBit0 = pipeBit0 ^ xBit1;
++            yBit1 = pipeBit1 ^ xBit0;
++
++            y = (yBit0 |
++                 (yBit1 << 1));
++            break;
++        case 8:
++            //
++            // 8 pipes
++            //
++            // r600 and r800 have different method
++            //
++            y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x);
++            break;
++        default:
++            break;
++    }
++    return y;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::HwlComputeXmaskCoordFromAddr
++*
++*   @brief
++*       Compute the coord from an address of a cmask/htile
++*
++*   @return
++*       N/A
++*
++*   @note
++*       This method is reused by htile, so rename to Xmask
++***************************************************************************************************
++*/
++VOID AddrLib::HwlComputeXmaskCoordFromAddr(
++    UINT_64         addr,           ///< [in] address
++    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
++    UINT_32         pitch,          ///< [in] pitch
++    UINT_32         height,         ///< [in] height
++    UINT_32         numSlices,      ///< [in] number of slices
++    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
++    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
++    BOOL_32         isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
++    BOOL_32         isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
++    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
++    UINT_32*        pX,             ///< [out] x coord
++    UINT_32*        pY,             ///< [out] y coord
++    UINT_32*        pSlice          ///< [out] slice index
++    ) const
++{
++    UINT_32 pipe;
++    UINT_32 numPipes;
++    UINT_32 numGroupBits;
++    UINT_32 numPipeBits;
++    UINT_32 macroTilePitch;
++    UINT_32 macroTileHeight;
++
++    UINT_64 bitAddr;
++
++    UINT_32 microTileCoordY;
++
++    UINT_32 elemBits;
++
++    UINT_32 pitchAligned = pitch;
++    UINT_32 heightAligned = height;
++    UINT_64 totalBytes;
++
++    UINT_64 elemOffset;
++
++    UINT_64 macroIndex;
++    UINT_32 microIndex;
++
++    UINT_64 macroNumber;
++    UINT_32 microNumber;
++
++    UINT_32 macroX;
++    UINT_32 macroY;
++    UINT_32 macroZ;
++
++    UINT_32 microX;
++    UINT_32 microY;
++
++    UINT_32 tilesPerMacro;
++    UINT_32 macrosPerPitch;
++    UINT_32 macrosPerSlice;
++
++    //
++    // Extract pipe.
++    //
++    numPipes = HwlGetPipes(pTileInfo);
++    pipe = ComputePipeFromAddr(addr, numPipes);
++
++    //
++    // Compute the number of group and pipe bits.
++    //
++    numGroupBits = Log2(m_pipeInterleaveBytes);
++    numPipeBits  = Log2(numPipes);
++
++    UINT_32 groupBits = 8 * m_pipeInterleaveBytes;
++    UINT_32 pipes = numPipes;
++
++
++    //
++    // Compute the micro tile size, in bits. And macro tile pitch and height.
++    //
++    if (factor == 2) //CMASK
++    {
++        ADDR_CMASK_FLAGS flags = {{0}};
++
++        elemBits = CmaskElemBits;
++
++        ComputeCmaskInfo(flags,
++                         pitch,
++                         height,
++                         numSlices,
++                         isLinear,
++                         pTileInfo,
++                         &pitchAligned,
++                         &heightAligned,
++                         &totalBytes,
++                         &macroTilePitch,
++                         &macroTileHeight);
++    }
++    else  //HTILE
++    {
++        ADDR_HTILE_FLAGS flags = {{0}};
++
++        if (factor != 1)
++        {
++            factor = 1;
++        }
++
++        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
++
++        ComputeHtileInfo(flags,
++                         pitch,
++                         height,
++                         numSlices,
++                         isLinear,
++                         isWidth8,
++                         isHeight8,
++                         pTileInfo,
++                         &pitchAligned,
++                         &heightAligned,
++                         &totalBytes,
++                         &macroTilePitch,
++                         &macroTileHeight);
++    }
++
++    // Should use aligned dims
++    //
++    pitch = pitchAligned;
++    height = heightAligned;
++
++
++    //
++    // Convert byte address to bit address.
++    //
++    bitAddr = BYTES_TO_BITS(addr) + bitPosition;
++
++
++    //
++    // Remove pipe bits from address.
++    //
++
++    bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits);
++
++
++    elemOffset = bitAddr / elemBits;
++
++    tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits;
++
++    macrosPerPitch = pitch / (macroTilePitch/factor);
++    macrosPerSlice = macrosPerPitch * height / macroTileHeight;
++
++    macroIndex = elemOffset / factor / tilesPerMacro;
++    microIndex = static_cast<UINT_32>(elemOffset % (tilesPerMacro * factor));
++
++    macroNumber = macroIndex * factor + microIndex % factor;
++    microNumber = microIndex / factor;
++
++    macroX = static_cast<UINT_32>((macroNumber % macrosPerPitch));
++    macroY = static_cast<UINT_32>((macroNumber % macrosPerSlice) / macrosPerPitch);
++    macroZ = static_cast<UINT_32>((macroNumber / macrosPerSlice));
++
++
++    microX = microNumber % (macroTilePitch / factor / MicroTileWidth);
++    microY = (microNumber / (macroTilePitch / factor / MicroTileHeight));
++
++    *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth;
++    *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits);
++    *pSlice = macroZ;
++
++    microTileCoordY = ComputeXmaskCoordYFromPipe(pipe,
++                                                 *pX/MicroTileWidth);
++
++
++    //
++    // Assemble final coordinates.
++    //
++    *pY += microTileCoordY * MicroTileHeight;
++
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::HwlComputeXmaskAddrFromCoord
++*
++*   @brief
++*       Compute the address from an address of cmask (prior to si)
++*
++*   @return
++*       Address in bytes
++*
++***************************************************************************************************
++*/
++UINT_64 AddrLib::HwlComputeXmaskAddrFromCoord(
++    UINT_32        pitch,          ///< [in] pitch
++    UINT_32        height,         ///< [in] height
++    UINT_32        x,              ///< [in] x coord
++    UINT_32        y,              ///< [in] y coord
++    UINT_32        slice,          ///< [in] slice/depth index
++    UINT_32        numSlices,      ///< [in] number of slices
++    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
++    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
++    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
++    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
++    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
++    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
++    ) const
++{
++    UINT_64 addr;
++    UINT_32 numGroupBits;
++    UINT_32 numPipeBits;
++    UINT_32 newPitch = 0;
++    UINT_32 newHeight = 0;
++    UINT_64 sliceBytes = 0;
++    UINT_64 totalBytes = 0;
++    UINT_64 sliceOffset;
++    UINT_32 pipe;
++    UINT_32 macroTileWidth;
++    UINT_32 macroTileHeight;
++    UINT_32 macroTilesPerRow;
++    UINT_32 macroTileBytes;
++    UINT_32 macroTileIndexX;
++    UINT_32 macroTileIndexY;
++    UINT_64 macroTileOffset;
++    UINT_32 pixelBytesPerRow;
++    UINT_32 pixelOffsetX;
++    UINT_32 pixelOffsetY;
++    UINT_32 pixelOffset;
++    UINT_64 totalOffset;
++    UINT_64 offsetLo;
++    UINT_64 offsetHi;
++    UINT_64 groupMask;
++
++
++    UINT_32 elemBits = 0;
++
++    UINT_32 numPipes = m_pipes; // This function is accessed prior to si only
++
++    if (factor == 2) //CMASK
++    {
++        elemBits = CmaskElemBits;
++
++        // For asics before SI, cmask is always tiled
++        isLinear = FALSE;
++    }
++    else //HTILE
++    {
++        if (factor != 1) // Fix compile warning
++        {
++            factor = 1;
++        }
++
++        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
++    }
++
++    //
++    // Compute the number of group bits and pipe bits.
++    //
++    numGroupBits = Log2(m_pipeInterleaveBytes);
++    numPipeBits  = Log2(numPipes);
++
++    //
++    // Compute macro tile dimensions.
++    //
++    if (factor == 2) // CMASK
++    {
++        ADDR_CMASK_FLAGS flags = {{0}};
++
++        ComputeCmaskInfo(flags,
++                         pitch,
++                         height,
++                         numSlices,
++                         isLinear,
++                         pTileInfo,
++                         &newPitch,
++                         &newHeight,
++                         &totalBytes,
++                         &macroTileWidth,
++                         &macroTileHeight);
++
++        sliceBytes = totalBytes / numSlices;
++    }
++    else // HTILE
++    {
++        ADDR_HTILE_FLAGS flags = {{0}};
++
++        ComputeHtileInfo(flags,
++                         pitch,
++                         height,
++                         numSlices,
++                         isLinear,
++                         isWidth8,
++                         isHeight8,
++                         pTileInfo,
++                         &newPitch,
++                         &newHeight,
++                         &totalBytes,
++                         &macroTileWidth,
++                         &macroTileHeight,
++                         &sliceBytes);
++    }
++
++    sliceOffset = slice * sliceBytes;
++
++    //
++    // Get the pipe.  Note that neither slice rotation nor pipe swizzling apply for CMASK.
++    //
++    pipe = ComputePipeFromCoord(x,
++                                y,
++                                0,
++                                ADDR_TM_2D_TILED_THIN1,
++                                0,
++                                FALSE,
++                                pTileInfo);
++
++    //
++    // Compute the number of macro tiles per row.
++    //
++    macroTilesPerRow = newPitch / macroTileWidth;
++
++    //
++    // Compute the number of bytes per macro tile.
++    //
++    macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels);
++
++    //
++    // Compute the offset to the macro tile containing the specified coordinate.
++    //
++    macroTileIndexX = x / macroTileWidth;
++    macroTileIndexY = y / macroTileHeight;
++    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
++
++    //
++    // Compute the pixel offset within the macro tile.
++    //
++    pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth;
++
++    //
++    // The nibbles are interleaved (see below), so the part of the offset relative to the x
++    // coordinate repeats halfway across the row. (Not for HTILE)
++    //
++    if (factor == 2)
++    {
++        pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth;
++    }
++    else
++    {
++        pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits);
++    }
++
++    //
++    // Compute the y offset within the macro tile.
++    //
++    pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow;
++
++    pixelOffset = pixelOffsetX + pixelOffsetY;
++
++    //
++    // Combine the slice offset and macro tile offset with the pixel offset, accounting for the
++    // pipe bits in the middle of the address.
++    //
++    totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset;
++
++    //
++    // Split the offset to put some bits below the pipe bits and some above.
++    //
++    groupMask = (1 << numGroupBits) - 1;
++    offsetLo  = totalOffset &  groupMask;
++    offsetHi  = (totalOffset & ~groupMask) << numPipeBits;
++
++    //
++    // Assemble the address from its components.
++    //
++    addr  = offsetLo;
++    addr |= offsetHi;
++    // This is to remove warning with /analyze option
++    UINT_32 pipeBits = pipe << numGroupBits;
++    addr |= pipeBits;
++
++    //
++    // Compute the bit position.  The lower nibble is used when the x coordinate within the macro
++    // tile is less than half of the macro tile width, and the upper nibble is used when the x
++    // coordinate within the macro tile is greater than or equal to half the macro tile width.
++    //
++    *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4;
++
++    return addr;
++}
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               Surface Addressing Shared
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeSurfaceAddrFromCoordLinear
++*
++*   @brief
++*       Compute address from coord for linear surface
++*
++*   @return
++*       Address in bytes
++*
++***************************************************************************************************
++*/
++UINT_64 AddrLib::ComputeSurfaceAddrFromCoordLinear(
++    UINT_32  x,              ///< [in] x coord
++    UINT_32  y,              ///< [in] y coord
++    UINT_32  slice,          ///< [in] slice/depth index
++    UINT_32  sample,         ///< [in] sample index
++    UINT_32  bpp,            ///< [in] bits per pixel
++    UINT_32  pitch,          ///< [in] pitch
++    UINT_32  height,         ///< [in] height
++    UINT_32  numSlices,      ///< [in] number of slices
++    UINT_32* pBitPosition    ///< [out] bit position inside a byte
++    ) const
++{
++    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
++
++    UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize;
++    UINT_64 rowOffset   = static_cast<UINT_64>(y) * pitch;
++    UINT_64 pixOffset   = x;
++
++    UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp;
++
++    *pBitPosition = static_cast<UINT_32>(addr % 8);
++    addr /= 8;
++
++    return addr;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeSurfaceCoordFromAddrLinear
++*
++*   @brief
++*       Compute the coord from an address of a linear surface
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrLib::ComputeSurfaceCoordFromAddrLinear(
++    UINT_64  addr,           ///< [in] address
++    UINT_32  bitPosition,    ///< [in] bitPosition in a byte
++    UINT_32  bpp,            ///< [in] bits per pixel
++    UINT_32  pitch,          ///< [in] pitch
++    UINT_32  height,         ///< [in] height
++    UINT_32  numSlices,      ///< [in] number of slices
++    UINT_32* pX,             ///< [out] x coord
++    UINT_32* pY,             ///< [out] y coord
++    UINT_32* pSlice,         ///< [out] slice/depth index
++    UINT_32* pSample         ///< [out] sample index
++    ) const
++{
++    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
++    const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp;
++
++    *pX = static_cast<UINT_32>((linearOffset % sliceSize) % pitch);
++    *pY = static_cast<UINT_32>((linearOffset % sliceSize) / pitch % height);
++    *pSlice  = static_cast<UINT_32>((linearOffset / sliceSize) % numSlices);
++    *pSample = static_cast<UINT_32>((linearOffset / sliceSize) / numSlices);
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeSurfaceCoordFromAddrMicroTiled
++*
++*   @brief
++*       Compute the coord from an address of a micro tiled surface
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrLib::ComputeSurfaceCoordFromAddrMicroTiled(
++    UINT_64         addr,               ///< [in] address
++    UINT_32         bitPosition,        ///< [in] bitPosition in a byte
++    UINT_32         bpp,                ///< [in] bits per pixel
++    UINT_32         pitch,              ///< [in] pitch
++    UINT_32         height,             ///< [in] height
++    UINT_32         numSamples,         ///< [in] number of samples
++    AddrTileMode    tileMode,           ///< [in] tile mode
++    UINT_32         tileBase,           ///< [in] base offset within a tile
++    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
++    UINT_32*        pX,                 ///< [out] x coord
++    UINT_32*        pY,                 ///< [out] y coord
++    UINT_32*        pSlice,             ///< [out] slice/depth index
++    UINT_32*        pSample,            ///< [out] sample index,
++    AddrTileType    microTileType,      ///< [in] micro tiling order
++    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if in depth sample order
++    ) const
++{
++    UINT_64 bitAddr;
++    UINT_32 microTileThickness;
++    UINT_32 microTileBits;
++    UINT_64 sliceBits;
++    UINT_64 rowBits;
++    UINT_32 sliceIndex;
++    UINT_32 microTileCoordX;
++    UINT_32 microTileCoordY;
++    UINT_32 pixelOffset;
++    UINT_32 pixelCoordX = 0;
++    UINT_32 pixelCoordY = 0;
++    UINT_32 pixelCoordZ = 0;
++    UINT_32 pixelCoordS = 0;
++
++    //
++    // Convert byte address to bit address.
++    //
++    bitAddr = BYTES_TO_BITS(addr) + bitPosition;
++
++    //
++    // Compute the micro tile size, in bits.
++    //
++    switch (tileMode)
++    {
++        case ADDR_TM_1D_TILED_THICK:
++            microTileThickness = ThickTileThickness;
++            break;
++        default:
++            microTileThickness = 1;
++            break;
++    }
++
++    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
++
++    //
++    // Compute number of bits per slice and number of bits per row of micro tiles.
++    //
++    sliceBits = static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples;
++
++    rowBits   = (pitch / MicroTileWidth) * microTileBits;
++
++    //
++    // Extract the slice index.
++    //
++    sliceIndex = static_cast<UINT_32>(bitAddr / sliceBits);
++    bitAddr -= sliceIndex * sliceBits;
++
++    //
++    // Extract the y coordinate of the micro tile.
++    //
++    microTileCoordY = static_cast<UINT_32>(bitAddr / rowBits) * MicroTileHeight;
++    bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits;
++
++    //
++    // Extract the x coordinate of the micro tile.
++    //
++    microTileCoordX = static_cast<UINT_32>(bitAddr / microTileBits) * MicroTileWidth;
++
++    //
++    // Compute the pixel offset within the micro tile.
++    //
++    pixelOffset = static_cast<UINT_32>(bitAddr % microTileBits);
++
++    //
++    // Extract pixel coordinates from the offset.
++    //
++    HwlComputePixelCoordFromOffset(pixelOffset,
++                                   bpp,
++                                   numSamples,
++                                   tileMode,
++                                   tileBase,
++                                   compBits,
++                                   &pixelCoordX,
++                                   &pixelCoordY,
++                                   &pixelCoordZ,
++                                   &pixelCoordS,
++                                   microTileType,
++                                   isDepthSampleOrder);
++
++    //
++    // Assemble final coordinates.
++    //
++    *pX     = microTileCoordX + pixelCoordX;
++    *pY     = microTileCoordY + pixelCoordY;
++    *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ;
++    *pSample = pixelCoordS;
++
++    if (microTileThickness > 1)
++    {
++        *pSample = 0;
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputePipeFromAddr
++*
++*   @brief
++*       Compute the pipe number from an address
++*
++*   @return
++*       Pipe number
++*
++***************************************************************************************************
++*/
++UINT_32 AddrLib::ComputePipeFromAddr(
++    UINT_64 addr,        ///< [in] address
++    UINT_32 numPipes     ///< [in] number of banks
++    ) const
++{
++    UINT_32 pipe;
++
++    UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms
++
++    // R600
++    // The LSBs of the address are arranged as follows:
++    //   bank | pipe | group
++    //
++    // To get the pipe number, shift off the group bits and mask the pipe bits.
++    //
++
++    // R800
++    // The LSBs of the address are arranged as follows:
++    //   bank | bankInterleave | pipe | pipeInterleave
++    //
++    // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits.
++    //
++
++    pipe = static_cast<UINT_32>(addr >> Log2(groupBytes)) & (numPipes - 1);
++
++    return pipe;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputePixelIndexWithinMicroTile
++*
++*   @brief
++*       Compute the pixel index inside a micro tile of surface
++*
++*   @return
++*       Pixel index
++*
++***************************************************************************************************
++*/
++UINT_32 AddrLib::ComputePixelIndexWithinMicroTile(
++    UINT_32         x,              ///< [in] x coord
++    UINT_32         y,              ///< [in] y coord
++    UINT_32         z,              ///< [in] slice/depth index
++    UINT_32         bpp,            ///< [in] bits per pixel
++    AddrTileMode    tileMode,       ///< [in] tile mode
++    AddrTileType    microTileType   ///< [in] pixel order in display/non-display mode
++    ) const
++{
++    UINT_32 pixelBit0 = 0;
++    UINT_32 pixelBit1 = 0;
++    UINT_32 pixelBit2 = 0;
++    UINT_32 pixelBit3 = 0;
++    UINT_32 pixelBit4 = 0;
++    UINT_32 pixelBit5 = 0;
++    UINT_32 pixelBit6 = 0;
++    UINT_32 pixelBit7 = 0;
++    UINT_32 pixelBit8 = 0;
++    UINT_32 pixelNumber;
++
++    UINT_32 x0 = _BIT(x, 0);
++    UINT_32 x1 = _BIT(x, 1);
++    UINT_32 x2 = _BIT(x, 2);
++    UINT_32 y0 = _BIT(y, 0);
++    UINT_32 y1 = _BIT(y, 1);
++    UINT_32 y2 = _BIT(y, 2);
++    UINT_32 z0 = _BIT(z, 0);
++    UINT_32 z1 = _BIT(z, 1);
++    UINT_32 z2 = _BIT(z, 2);
++
++    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++
++    // Compute the pixel number within the micro tile.
++
++    if (microTileType != ADDR_THICK)
++    {
++        if (microTileType == ADDR_DISPLAYABLE)
++        {
++            switch (bpp)
++            {
++                case 8:
++                    pixelBit0 = x0;
++                    pixelBit1 = x1;
++                    pixelBit2 = x2;
++                    pixelBit3 = y1;
++                    pixelBit4 = y0;
++                    pixelBit5 = y2;
++                    break;
++                case 16:
++                    pixelBit0 = x0;
++                    pixelBit1 = x1;
++                    pixelBit2 = x2;
++                    pixelBit3 = y0;
++                    pixelBit4 = y1;
++                    pixelBit5 = y2;
++                    break;
++                case 32:
++                    pixelBit0 = x0;
++                    pixelBit1 = x1;
++                    pixelBit2 = y0;
++                    pixelBit3 = x2;
++                    pixelBit4 = y1;
++                    pixelBit5 = y2;
++                    break;
++                case 64:
++                    pixelBit0 = x0;
++                    pixelBit1 = y0;
++                    pixelBit2 = x1;
++                    pixelBit3 = x2;
++                    pixelBit4 = y1;
++                    pixelBit5 = y2;
++                    break;
++                case 128:
++                    pixelBit0 = y0;
++                    pixelBit1 = x0;
++                    pixelBit2 = x1;
++                    pixelBit3 = x2;
++                    pixelBit4 = y1;
++                    pixelBit5 = y2;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    break;
++            }
++        }
++        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
++        {
++            pixelBit0 = x0;
++            pixelBit1 = y0;
++            pixelBit2 = x1;
++            pixelBit3 = y1;
++            pixelBit4 = x2;
++            pixelBit5 = y2;
++        }
++        else if (microTileType == ADDR_ROTATED)
++        {
++            ADDR_ASSERT(thickness == 1);
++
++            switch (bpp)
++            {
++                case 8:
++                    pixelBit0 = y0;
++                    pixelBit1 = y1;
++                    pixelBit2 = y2;
++                    pixelBit3 = x1;
++                    pixelBit4 = x0;
++                    pixelBit5 = x2;
++                    break;
++                case 16:
++                    pixelBit0 = y0;
++                    pixelBit1 = y1;
++                    pixelBit2 = y2;
++                    pixelBit3 = x0;
++                    pixelBit4 = x1;
++                    pixelBit5 = x2;
++                    break;
++                case 32:
++                    pixelBit0 = y0;
++                    pixelBit1 = y1;
++                    pixelBit2 = x0;
++                    pixelBit3 = y2;
++                    pixelBit4 = x1;
++                    pixelBit5 = x2;
++                    break;
++                case 64:
++                    pixelBit0 = y0;
++                    pixelBit1 = x0;
++                    pixelBit2 = y1;
++                    pixelBit3 = x1;
++                    pixelBit4 = x2;
++                    pixelBit5 = y2;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    break;
++            }
++        }
++
++        if (thickness > 1)
++        {
++            pixelBit6 = z0;
++            pixelBit7 = z1;
++        }
++    }
++    else // ADDR_THICK
++    {
++        ADDR_ASSERT(thickness > 1);
++
++        switch (bpp)
++        {
++            case 8:
++            case 16:
++                pixelBit0 = x0;
++                pixelBit1 = y0;
++                pixelBit2 = x1;
++                pixelBit3 = y1;
++                pixelBit4 = z0;
++                pixelBit5 = z1;
++                break;
++            case 32:
++                pixelBit0 = x0;
++                pixelBit1 = y0;
++                pixelBit2 = x1;
++                pixelBit3 = z0;
++                pixelBit4 = y1;
++                pixelBit5 = z1;
++                break;
++            case 64:
++            case 128:
++                pixelBit0 = y0;
++                pixelBit1 = x0;
++                pixelBit2 = z0;
++                pixelBit3 = x1;
++                pixelBit4 = y1;
++                pixelBit5 = z1;
++                break;
++            default:
++                ADDR_ASSERT_ALWAYS();
++                break;
++        }
++
++        pixelBit6 = x2;
++        pixelBit7 = y2;
++    }
++
++    if (thickness == 8)
++    {
++        pixelBit8 = z2;
++    }
++
++    pixelNumber = ((pixelBit0     ) |
++                   (pixelBit1 << 1) |
++                   (pixelBit2 << 2) |
++                   (pixelBit3 << 3) |
++                   (pixelBit4 << 4) |
++                   (pixelBit5 << 5) |
++                   (pixelBit6 << 6) |
++                   (pixelBit7 << 7) |
++                   (pixelBit8 << 8));
++
++    return pixelNumber;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::AdjustPitchAlignment
++*
++*   @brief
++*       Adjusts pitch alignment for flipping surface
++*
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID AddrLib::AdjustPitchAlignment(
++    ADDR_SURFACE_FLAGS  flags,      ///< [in] Surface flags
++    UINT_32*            pPitchAlign ///< [out] Pointer to pitch alignment
++    ) const
++{
++    // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment
++    // Maybe it will be fixed in future but let's make it general for now.
++    if (flags.display || flags.overlay)
++    {
++        *pPitchAlign = PowTwoAlign(*pPitchAlign, 32);
++
++        if(flags.display)
++        {
++            *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign);
++        }
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::PadDimensions
++*
++*   @brief
++*       Helper function to pad dimensions
++*
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID AddrLib::PadDimensions(
++    AddrTileMode        tileMode,    ///< [in] tile mode
++    UINT_32             bpp,         ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
++    UINT_32             numSamples,  ///< [in] number of samples
++    ADDR_TILEINFO*      pTileInfo,   ///< [in/out] bank structure.
++    UINT_32             padDims,     ///< [in] Dimensions to pad valid value 1,2,3
++    UINT_32             mipLevel,    ///< [in] MipLevel
++    UINT_32*            pPitch,      ///< [in/out] pitch in pixels
++    UINT_32             pitchAlign,  ///< [in] pitch alignment
++    UINT_32*            pHeight,     ///< [in/out] height in pixels
++    UINT_32             heightAlign, ///< [in] height alignment
++    UINT_32*            pSlices,     ///< [in/out] number of slices
++    UINT_32             sliceAlign   ///< [in] number of slice alignment
++    ) const
++{
++    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++
++    ADDR_ASSERT(padDims <= 3);
++
++    //
++    // Override padding for mip levels
++    //
++    if (mipLevel > 0)
++    {
++        if (flags.cube)
++        {
++            // for cubemap, we only pad when client call with 6 faces as an identity
++            if (*pSlices > 1)
++            {
++                padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture
++            }
++            else
++            {
++                padDims = 2;
++            }
++        }
++    }
++
++    // Any possibilities that padDims is 0?
++    if (padDims == 0)
++    {
++        padDims = 3;
++    }
++
++    if (IsPow2(pitchAlign))
++    {
++        *pPitch = PowTwoAlign((*pPitch), pitchAlign);
++    }
++    else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear
++    {
++        *pPitch += pitchAlign - 1;
++        *pPitch /= pitchAlign;
++        *pPitch *= pitchAlign;
++    }
++
++    if (padDims > 1)
++    {
++        *pHeight = PowTwoAlign((*pHeight), heightAlign);
++    }
++
++    if (padDims > 2 || thickness > 1)
++    {
++        // for cubemap single face, we do not pad slices.
++        // if we pad it, the slice number should be set to 6 and current mip level > 1
++        if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray))
++        {
++            *pSlices = NextPow2(*pSlices);
++        }
++
++        // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test)
++        if (thickness > 1)
++        {
++            *pSlices = PowTwoAlign((*pSlices), sliceAlign);
++        }
++
++    }
++
++    HwlPadDimensions(tileMode,
++                     bpp,
++                     flags,
++                     numSamples,
++                     pTileInfo,
++                     padDims,
++                     mipLevel,
++                     pPitch,
++                     pitchAlign,
++                     pHeight,
++                     heightAlign,
++                     pSlices,
++                     sliceAlign);
++}
++
++
++/**
++***************************************************************************************************
++*   AddrLib::HwlPreHandleBaseLvl3xPitch
++*
++*   @brief
++*       Pre-handler of 3x pitch (96 bit) adjustment
++*
++*   @return
++*       Expected pitch
++***************************************************************************************************
++*/
++UINT_32 AddrLib::HwlPreHandleBaseLvl3xPitch(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
++    UINT_32                                 expPitch    ///< [in] pitch
++    ) const
++{
++    ADDR_ASSERT(pIn->width == expPitch);
++    //
++    // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size
++    //
++    if (AddrElemLib::IsExpand3x(pIn->format) &&
++        pIn->mipLevel == 0 &&
++        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
++    {
++        expPitch /= 3;
++        expPitch = NextPow2(expPitch);
++    }
++
++    return expPitch;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::HwlPostHandleBaseLvl3xPitch
++*
++*   @brief
++*       Post-handler of 3x pitch adjustment
++*
++*   @return
++*       Expected pitch
++***************************************************************************************************
++*/
++UINT_32 AddrLib::HwlPostHandleBaseLvl3xPitch(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
++    UINT_32                                 expPitch    ///< [in] pitch
++    ) const
++{
++    //
++    // 96 bits surface of sub levels require element pitch of 32 bits instead
++    // So we just return pitch in 32 bit pixels without timing 3
++    //
++    if (AddrElemLib::IsExpand3x(pIn->format) &&
++        pIn->mipLevel == 0 &&
++        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
++    {
++        expPitch *= 3;
++    }
++
++    return expPitch;
++}
++
++
++/**
++***************************************************************************************************
++*   AddrLib::IsMacroTiled
++*
++*   @brief
++*       Check if the tile mode is macro tiled
++*
++*   @return
++*       TRUE if it is macro tiled (2D/2B/3D/3B)
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::IsMacroTiled(
++    AddrTileMode tileMode)  ///< [in] tile mode
++{
++   return m_modeFlags[tileMode].isMacro;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::IsMacro3dTiled
++*
++*   @brief
++*       Check if the tile mode is 3D macro tiled
++*
++*   @return
++*       TRUE if it is 3D macro tiled
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::IsMacro3dTiled(
++    AddrTileMode tileMode)  ///< [in] tile mode
++{
++    return m_modeFlags[tileMode].isMacro3d;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::IsMicroTiled
++*
++*   @brief
++*       Check if the tile mode is micro tiled
++*
++*   @return
++*       TRUE if micro tiled
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::IsMicroTiled(
++    AddrTileMode tileMode)  ///< [in] tile mode
++{
++    return m_modeFlags[tileMode].isMicro;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::IsLinear
++*
++*   @brief
++*       Check if the tile mode is linear
++*
++*   @return
++*       TRUE if linear
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::IsLinear(
++    AddrTileMode tileMode)  ///< [in] tile mode
++{
++    return m_modeFlags[tileMode].isLinear;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::IsPrtNoRotationTileMode
++*
++*   @brief
++*       Return TRUE if it is prt tile without rotation
++*   @note
++*       This function just used by CI
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::IsPrtNoRotationTileMode(
++    AddrTileMode tileMode)
++{
++    return m_modeFlags[tileMode].isPrtNoRotation;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::IsPrtTileMode
++*
++*   @brief
++*       Return TRUE if it is prt tile
++*   @note
++*       This function just used by CI
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::IsPrtTileMode(
++    AddrTileMode tileMode)
++{
++    return m_modeFlags[tileMode].isPrt;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::Bits2Number
++*
++*   @brief
++*       Cat a array of binary bit to a number
++*
++*   @return
++*       The number combined with the array of bits
++***************************************************************************************************
++*/
++UINT_32 AddrLib::Bits2Number(
++    UINT_32 bitNum,     ///< [in] how many bits
++    ...)                ///< [in] varaible bits value starting from MSB
++{
++    UINT_32 number = 0;
++    UINT_32 i;
++    va_list bits_ptr;
++
++    va_start(bits_ptr, bitNum);
++
++    for(i = 0; i < bitNum; i++)
++    {
++        number |= va_arg(bits_ptr, UINT_32);
++        number <<= 1;
++    }
++
++    number>>=1;
++
++    va_end(bits_ptr);
++
++    return number;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeMipLevel
++*
++*   @brief
++*       Compute mipmap level width/height/slices
++*   @return
++*      N/A
++***************************************************************************************************
++*/
++VOID AddrLib::ComputeMipLevel(
++    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in/out] Input structure
++    ) const
++{
++    // Check if HWL has handled
++    BOOL_32 hwlHandled = FALSE;
++
++    if (AddrElemLib::IsBlockCompressed(pIn->format))
++    {
++        if (pIn->mipLevel == 0)
++        {
++            // DXTn's level 0 must be multiple of 4
++            // But there are exceptions:
++            // 1. Internal surface creation in hostblt/vsblt/etc...
++            // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4
++            pIn->width = PowTwoAlign(pIn->width, 4);
++            pIn->height = PowTwoAlign(pIn->height, 4);
++        }
++    }
++
++    hwlHandled = HwlComputeMipLevel(pIn);
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::DegradeBaseLevel
++*
++*   @brief
++*       Check if base level's tile mode can be degraded
++*   @return
++*       TRUE if degraded, also returns degraded tile mode (unchanged if not degraded)
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::DegradeBaseLevel(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure for surface info
++    AddrTileMode*                           pTileMode   ///< [out] Degraded tile mode
++    ) const
++{
++    BOOL_32 degraded = FALSE;
++    AddrTileMode tileMode = pIn->tileMode;
++    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++
++    if (m_configFlags.degradeBaseLevel) // This is a global setting
++    {
++        if (pIn->flags.degrade4Space        && // Degradation per surface
++            pIn->mipLevel == 0              &&
++            pIn->numSamples == 1            &&
++            IsMacroTiled(tileMode))
++        {
++            if (HwlDegradeBaseLevel(pIn))
++            {
++                *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
++                degraded = TRUE;
++            }
++            else if (thickness > 1)
++            {
++                // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
++                // thinner modes, we should re-evaluate whether the corresponding thinner modes
++                // need to be degraded. If so, we choose 1D thick mode instead.
++                tileMode = DegradeLargeThickTile(pIn->tileMode, pIn->bpp);
++                if (tileMode != pIn->tileMode)
++                {
++                    ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pIn;
++                    input.tileMode = tileMode;
++                    if (HwlDegradeBaseLevel(&input))
++                    {
++                        *pTileMode = ADDR_TM_1D_TILED_THICK;
++                        degraded = TRUE;
++                    }
++                }
++            }
++        }
++    }
++
++    return degraded;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::DegradeLargeThickTile
++*
++*   @brief
++*       Check if the thickness needs to be reduced if a tile is too large
++*   @return
++*       The degraded tile mode (unchanged if not degraded)
++***************************************************************************************************
++*/
++AddrTileMode AddrLib::DegradeLargeThickTile(
++    AddrTileMode tileMode,
++    UINT_32 bpp) const
++{
++    // Override tilemode
++    // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size,
++    // it is better to just use THIN mode in this case
++    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++
++    if (thickness > 1 && m_configFlags.allowLargeThickTile == 0)
++    {
++        UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3);
++
++        if (tileSize > m_rowSize)
++        {
++            switch (tileMode)
++            {
++                case ADDR_TM_2D_TILED_XTHICK:
++                    if ((tileSize >> 1) <= m_rowSize)
++                    {
++                        tileMode = ADDR_TM_2D_TILED_THICK;
++                        break;
++                    }
++                    // else fall through
++                case ADDR_TM_2D_TILED_THICK:
++                    tileMode    = ADDR_TM_2D_TILED_THIN1;
++                    break;
++
++                case ADDR_TM_3D_TILED_XTHICK:
++                    if ((tileSize >> 1) <= m_rowSize)
++                    {
++                        tileMode = ADDR_TM_3D_TILED_THICK;
++                        break;
++                    }
++                    // else fall through
++                case ADDR_TM_3D_TILED_THICK:
++                    tileMode    = ADDR_TM_3D_TILED_THIN1;
++                    break;
++
++                case ADDR_TM_PRT_TILED_THICK:
++                    tileMode    = ADDR_TM_PRT_TILED_THIN1;
++                    break;
++
++                case ADDR_TM_PRT_2D_TILED_THICK:
++                    tileMode    = ADDR_TM_PRT_2D_TILED_THIN1;
++                    break;
++
++                case ADDR_TM_PRT_3D_TILED_THICK:
++                    tileMode    = ADDR_TM_PRT_3D_TILED_THIN1;
++                    break;
++
++                default:
++                    break;
++            }
++        }
++    }
++
++    return tileMode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::PostComputeMipLevel
++*   @brief
++*       Compute MipLevel info (including level 0) after surface adjustment
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::PostComputeMipLevel(
++    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pIn,   ///< [in/out] Input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut   ///< [out] Output structure
++    ) const
++{
++    // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is
++    // required by CFX  for Hw Compatibility between NI and SI. Otherwise it is only needed for
++    // mipLevel > 0. Any h/w has different requirement should implement its own virtual function
++
++    if (pIn->flags.pow2Pad)
++    {
++        pIn->width      = NextPow2(pIn->width);
++        pIn->height     = NextPow2(pIn->height);
++        pIn->numSlices  = NextPow2(pIn->numSlices);
++    }
++    else if (pIn->mipLevel > 0)
++    {
++        pIn->width      = NextPow2(pIn->width);
++        pIn->height     = NextPow2(pIn->height);
++
++        if (!pIn->flags.cube)
++        {
++            pIn->numSlices = NextPow2(pIn->numSlices);
++        }
++
++        // for cubemap, we keep its value at first
++    }
++
++    return ADDR_OK;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::HwlSetupTileCfg
++*
++*   @brief
++*       Map tile index to tile setting.
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::HwlSetupTileCfg(
++    INT_32          index,            ///< [in] Tile index
++    INT_32          macroModeIndex,   ///< [in] Index in macro tile mode table(CI)
++    ADDR_TILEINFO*  pInfo,            ///< [out] Tile Info
++    AddrTileMode*   pMode,            ///< [out] Tile mode
++    AddrTileType*   pType             ///< [out] Tile type
++    ) const
++{
++    return ADDR_NOTSUPPORTED;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::HwlGetPipes
++*
++*   @brief
++*       Get number pipes
++*   @return
++*       num pipes
++***************************************************************************************************
++*/
++UINT_32 AddrLib::HwlGetPipes(
++    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
++    ) const
++{
++    //pTileInfo can be NULL when asic is 6xx and 8xx.
++    return m_pipes;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputeQbStereoInfo
++*
++*   @brief
++*       Get quad buffer stereo information
++*   @return
++*       TRUE if no error
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::ComputeQbStereoInfo(
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [in/out] updated pOut+pStereoInfo
++    ) const
++{
++    BOOL_32 success = FALSE;
++
++    if (pOut->pStereoInfo)
++    {
++        ADDR_ASSERT(pOut->bpp >= 8);
++        ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);
++
++        // Save original height
++        pOut->pStereoInfo->eyeHeight = pOut->height;
++
++        // Right offset
++        pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize);
++
++        pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut);
++        // Double height
++        pOut->height <<= 1;
++        pOut->pixelHeight <<= 1;
++
++        // Double size
++        pOut->surfSize <<= 1;
++
++        // Right start address meets the base align since it is guaranteed by AddrLib
++
++        // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo.
++        success = TRUE;
++    }
++
++    return success;
++}
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++//                               Element lib
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++
++/**
++***************************************************************************************************
++*   AddrLib::Flt32ToColorPixel
++*
++*   @brief
++*       Convert a FLT_32 value to a depth/stencil pixel value
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::Flt32ToDepthPixel(
++    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
++    ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) ||
++            (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        GetElemLib()->Flt32ToDepthPixel(pIn->format,
++                                        pIn->comps,
++                                        pOut->pPixel);
++        UINT_32 depthBase = 0;
++        UINT_32 stencilBase = 0;
++        UINT_32 depthBits = 0;
++        UINT_32 stencilBits = 0;
++
++        switch (pIn->format)
++        {
++            case ADDR_DEPTH_16:
++                depthBits = 16;
++                break;
++            case ADDR_DEPTH_X8_24:
++            case ADDR_DEPTH_8_24:
++            case ADDR_DEPTH_X8_24_FLOAT:
++            case ADDR_DEPTH_8_24_FLOAT:
++                depthBase = 8;
++                depthBits = 24;
++                stencilBits = 8;
++                break;
++            case ADDR_DEPTH_32_FLOAT:
++                depthBits = 32;
++                break;
++            case ADDR_DEPTH_X24_8_32_FLOAT:
++                depthBase = 8;
++                depthBits = 32;
++                stencilBits = 8;
++                break;
++            default:
++                break;
++        }
++
++        // Overwrite base since R800 has no "tileBase"
++        if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE)
++        {
++            depthBase = 0;
++            stencilBase = 0;
++        }
++
++        depthBase *= 64;
++        stencilBase *= 64;
++
++        pOut->stencilBase = stencilBase;
++        pOut->depthBase = depthBase;
++        pOut->depthBits = depthBits;
++        pOut->stencilBits = stencilBits;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::Flt32ToColorPixel
++*
++*   @brief
++*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::Flt32ToColorPixel(
++    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
++    ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) ||
++            (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT)))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        GetElemLib()->Flt32ToColorPixel(pIn->format,
++                                        pIn->surfNum,
++                                        pIn->surfSwap,
++                                        pIn->comps,
++                                        pOut->pPixel);
++    }
++
++    return returnCode;
++}
++
++
++/**
++***************************************************************************************************
++*   AddrLib::GetExportNorm
++*
++*   @brief
++*       Check one format can be EXPORT_NUM
++*   @return
++*       TRUE if EXPORT_NORM can be used
++***************************************************************************************************
++*/
++BOOL_32 AddrLib::GetExportNorm(
++    const ELEM_GETEXPORTNORM_INPUT* pIn) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    BOOL_32 enabled = FALSE;
++
++    if (GetFillSizeFieldsFlags() == TRUE)
++    {
++        if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT))
++        {
++            returnCode = ADDR_PARAMSIZEMISMATCH;
++        }
++    }
++
++    if (returnCode == ADDR_OK)
++    {
++        enabled = GetElemLib()->PixGetExportNorm(pIn->format,
++                                                 pIn->num,
++                                                 pIn->swap);
++    }
++
++    return enabled;
++}
++
++/**
++***************************************************************************************************
++*   AddrLib::ComputePrtInfo
++*
++*   @brief
++*       Compute prt surface related info
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE AddrLib::ComputePrtInfo(
++    const ADDR_PRT_INFO_INPUT*  pIn,
++    ADDR_PRT_INFO_OUTPUT*       pOut) const
++{
++    ADDR_ASSERT(pOut != NULL);
++
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    UINT_32     expandX = 1;
++    UINT_32     expandY = 1;
++    AddrElemMode elemMode;
++
++    UINT_32     bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
++                                                &elemMode,
++                                                &expandX,
++                                                &expandY);
++
++    if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96 )
++    {
++        returnCode = ADDR_INVALIDPARAMS;
++    }
++
++    UINT_32     numFrags = pIn->numFrags;
++    ADDR_ASSERT(numFrags <= 8);
++
++    UINT_32     tileWidth = 0;
++    UINT_32     tileHeight = 0;
++    if (returnCode == ADDR_OK)
++    {
++        // 3D texture without depth or 2d texture
++        if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1)
++        {
++            if (bpp == 8)
++            {
++                tileWidth = 256;
++                tileHeight = 256;
++            }
++            else if (bpp == 16)
++            {
++                tileWidth = 256;
++                tileHeight = 128;
++            }
++            else if (bpp == 32)
++            {
++                tileWidth = 128;
++                tileHeight = 128;
++            }
++            else if (bpp == 64)
++            {
++                // assume it is BC1/4
++                tileWidth = 512;
++                tileHeight = 256;
++
++                if (elemMode == ADDR_UNCOMPRESSED)
++                {
++                    tileWidth = 128;
++                    tileHeight = 64;
++                }
++            }
++            else if (bpp == 128)
++            {
++                // assume it is BC2/3/5/6H/7
++                tileWidth = 256;
++                tileHeight = 256;
++
++                if (elemMode == ADDR_UNCOMPRESSED)
++                {
++                    tileWidth = 64;
++                    tileHeight = 64;
++                }
++            }
++
++            if (numFrags == 2)
++            {
++                tileWidth = tileWidth / 2;
++            }
++            else if (numFrags == 4)
++            {
++                tileWidth = tileWidth / 2;
++                tileHeight = tileHeight / 2;
++            }
++            else if (numFrags == 8)
++            {
++                tileWidth = tileWidth / 4;
++                tileHeight = tileHeight / 2;
++            }
++        }
++        else    // 1d
++        {
++            tileHeight = 1;
++            if (bpp == 8)
++            {
++                tileWidth = 65536;
++            }
++            else if (bpp == 16)
++            {
++                tileWidth = 32768;
++            }
++            else if (bpp == 32)
++            {
++                tileWidth = 16384;
++            }
++            else if (bpp == 64)
++            {
++                tileWidth = 8192;
++            }
++            else if (bpp == 128)
++            {
++                tileWidth = 4096;
++            }
++        }
++    }
++
++    pOut->prtTileWidth = tileWidth;
++    pOut->prtTileHeight = tileHeight;
++
++    return returnCode;
++}
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrlib.h b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrlib.h
+new file mode 100644
+index 0000000..43c55ff
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrlib.h
+@@ -0,0 +1,695 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrlib.h
++* @brief Contains the AddrLib base class definition.
++***************************************************************************************************
++*/
++
++#ifndef __ADDR_LIB_H__
++#define __ADDR_LIB_H__
++
++
++#include "addrinterface.h"
++#include "addrobject.h"
++#include "addrelemlib.h"
++
++#if BRAHMA_BUILD
++#include "amdgpu_id.h"
++#else
++#include "atiid.h"
++#endif
++
++#ifndef CIASICIDGFXENGINE_R600
++#define CIASICIDGFXENGINE_R600 0x00000006
++#endif
++
++#ifndef CIASICIDGFXENGINE_R800
++#define CIASICIDGFXENGINE_R800 0x00000008
++#endif
++
++#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
++#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
++#endif
++
++#ifndef CIASICIDGFXENGINE_SEAISLAND
++#define CIASICIDGFXENGINE_SEAISLAND 0x0000000B
++#endif
++/**
++***************************************************************************************************
++* @brief Neutral enums that define pipeinterleave
++***************************************************************************************************
++*/
++enum AddrPipeInterleave
++{
++    ADDR_PIPEINTERLEAVE_256B = 256,
++    ADDR_PIPEINTERLEAVE_512B = 512,
++};
++
++/**
++***************************************************************************************************
++* @brief Neutral enums that define DRAM row size
++***************************************************************************************************
++*/
++enum AddrRowSize
++{
++    ADDR_ROWSIZE_1KB = 1024,
++    ADDR_ROWSIZE_2KB = 2048,
++    ADDR_ROWSIZE_4KB = 4096,
++    ADDR_ROWSIZE_8KB = 8192,
++};
++
++/**
++***************************************************************************************************
++* @brief Neutral enums that define bank interleave
++***************************************************************************************************
++*/
++enum AddrBankInterleave
++{
++    ADDR_BANKINTERLEAVE_1 = 1,
++    ADDR_BANKINTERLEAVE_2 = 2,
++    ADDR_BANKINTERLEAVE_4 = 4,
++    ADDR_BANKINTERLEAVE_8 = 8,
++};
++
++/**
++***************************************************************************************************
++* @brief Neutral enums that define MGPU chip tile size
++***************************************************************************************************
++*/
++enum AddrChipTileSize
++{
++    ADDR_CHIPTILESIZE_16 = 16,
++    ADDR_CHIPTILESIZE_32 = 32,
++    ADDR_CHIPTILESIZE_64 = 64,
++    ADDR_CHIPTILESIZE_128 = 128,
++};
++
++/**
++***************************************************************************************************
++* @brief Neutral enums that define shader engine tile size
++***************************************************************************************************
++*/
++enum AddrEngTileSize
++{
++    ADDR_SE_TILESIZE_16 = 16,
++    ADDR_SE_TILESIZE_32 = 32,
++};
++
++/**
++***************************************************************************************************
++* @brief Neutral enums that define bank swap size
++***************************************************************************************************
++*/
++enum AddrBankSwapSize
++{
++    ADDR_BANKSWAP_128B = 128,
++    ADDR_BANKSWAP_256B = 256,
++    ADDR_BANKSWAP_512B = 512,
++    ADDR_BANKSWAP_1KB = 1024,
++};
++
++/**
++***************************************************************************************************
++* @brief Neutral enums that define bank swap size
++***************************************************************************************************
++*/
++enum AddrSampleSplitSize
++{
++    ADDR_SAMPLESPLIT_1KB = 1024,
++    ADDR_SAMPLESPLIT_2KB = 2048,
++    ADDR_SAMPLESPLIT_4KB = 4096,
++    ADDR_SAMPLESPLIT_8KB = 8192,
++};
++
++/**
++***************************************************************************************************
++* @brief Flags for AddrTileMode
++***************************************************************************************************
++*/
++struct AddrTileModeFlags
++{
++    UINT_32 thickness       : 4;
++    UINT_32 isLinear        : 1;
++    UINT_32 isMicro         : 1;
++    UINT_32 isMacro         : 1;
++    UINT_32 isMacro3d       : 1;
++    UINT_32 isPrt           : 1;
++    UINT_32 isPrtNoRotation : 1;
++    UINT_32 isBankSwapped   : 1;
++};
++
++/**
++***************************************************************************************************
++* @brief This class contains asic independent address lib functionalities
++***************************************************************************************************
++*/
++class AddrLib : public AddrObject
++{
++public:
++    virtual ~AddrLib();
++
++    static ADDR_E_RETURNCODE Create(
++        const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut);
++
++    /// Pair of Create
++    VOID Destroy()
++    {
++        delete this;
++    }
++
++    static AddrLib* GetAddrLib(
++        ADDR_HANDLE hLib);
++
++    /// Returns AddrLib version (from compiled binary instead include file)
++    UINT_32 GetVersion()
++    {
++        return m_version;
++    }
++
++    /// Returns asic chip family name defined by AddrLib
++    AddrChipFamily GetAddrChipFamily()
++    {
++        return m_chipFamily;
++    }
++
++    /// Returns tileIndex support
++    BOOL_32 UseTileIndex(INT_32 index) const
++    {
++        return m_configFlags.useTileIndex && (index != TileIndexInvalid);
++    }
++
++    /// Returns combined swizzle support
++    BOOL_32 UseCombinedSwizzle() const
++    {
++        return m_configFlags.useCombinedSwizzle;
++    }
++
++    //
++    // Interface stubs
++    //
++    ADDR_E_RETURNCODE ComputeSurfaceInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
++        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
++        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT*  pIn,
++        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeSliceTileSwizzle(
++        const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
++        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ExtractBankPipeSwizzle(
++        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
++        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE CombineBankPipeSwizzle(
++        const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
++        ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeBaseSwizzle(
++        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
++        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeFmaskInfo(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT*  pIn,
++        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
++
++    ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
++        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*  pIn,
++        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
++        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,
++        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ConvertTileInfoToHW(
++        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
++        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ConvertTileIndex(
++        const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
++        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ConvertTileIndex1(
++        const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,
++        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE GetTileIndex(
++        const ADDR_GET_TILEINDEX_INPUT* pIn,
++        ADDR_GET_TILEINDEX_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeHtileInfo(
++        const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeCmaskInfo(
++        const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn,
++        ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeDccInfo(
++        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
++        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
++        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,
++        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
++        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,
++        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
++        const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*  pIn,
++        ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
++        const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*  pIn,
++        ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE ComputePrtInfo(
++        const ADDR_PRT_INFO_INPUT*  pIn,
++        ADDR_PRT_INFO_OUTPUT*       pOut) const;
++
++    ADDR_E_RETURNCODE Flt32ToDepthPixel(
++        const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
++        ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE Flt32ToColorPixel(
++        const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
++        ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const;
++
++    BOOL_32 GetExportNorm(
++        const ELEM_GETEXPORTNORM_INPUT* pIn) const;
++
++protected:
++    AddrLib();  // Constructor is protected
++    AddrLib(const AddrClient* pClient);
++
++    /// Pure Virtual function for Hwl computing surface info
++    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl computing surface address from coord
++    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
++        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl computing surface coord from address
++    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
++        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl computing surface tile swizzle
++    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
++        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
++        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b
++    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
++        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
++        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl combining bank/pipe swizzle
++    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
++        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
++        UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0;
++
++    /// Pure Virtual function for Hwl computing base swizzle
++    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
++        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
++        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl computing HTILE base align
++    virtual UINT_32 HwlComputeHtileBaseAlign(
++        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0;
++
++    /// Pure Virtual function for Hwl computing HTILE bpp
++    virtual UINT_32 HwlComputeHtileBpp(
++        BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0;
++
++    /// Pure Virtual function for Hwl computing HTILE bytes
++    virtual UINT_64 HwlComputeHtileBytes(
++        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
++        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0;
++
++    /// Pure Virtual function for Hwl computing FMASK info
++    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0;
++
++    /// Pure Virtual function for Hwl FMASK address from coord
++    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
++        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl FMASK coord from address
++    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
++        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl convert tile info from real value to HW value
++    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
++        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
++        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0;
++
++    /// Pure Virtual function for Hwl compute mipmap info
++    virtual BOOL_32 HwlComputeMipLevel(
++        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
++
++    /// Pure Virtual function for Hwl compute max cmask blockMax value
++    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0;
++
++    /// Pure Virtual function for Hwl compute fmask bits
++    virtual UINT_32 HwlComputeFmaskBits(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
++        UINT_32* pNumSamples) const = 0;
++
++    /// Virtual function to get index (not pure then no need to implement this in all hwls
++    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
++        const ADDR_GET_TILEINDEX_INPUT* pIn,
++        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
++    {
++        return ADDR_NOTSUPPORTED;
++    }
++
++    /// Virtual function for Hwl to compute Dcc info
++    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
++        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
++        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const
++    {
++        return ADDR_NOTSUPPORTED;
++    }
++
++    /// Virtual function to get cmask address for tc compatible cmask
++    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
++        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const
++    {
++        return ADDR_NOTSUPPORTED;
++    }
++    // Compute attributes
++
++    // HTILE
++    UINT_32    ComputeHtileInfo(
++        ADDR_HTILE_FLAGS flags,
++        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices,
++        BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
++        ADDR_TILEINFO*  pTileInfo,
++        UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes,
++        UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL,
++        UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const;
++
++    // CMASK
++    ADDR_E_RETURNCODE ComputeCmaskInfo(
++        ADDR_CMASK_FLAGS flags,
++        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear,
++        ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes,
++        UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL,
++        UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const;
++
++    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
++        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
++        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
++
++    // CMASK & HTILE addressing
++    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
++        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice,
++        UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8,
++        BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo,
++        UINT_32* bitPosition) const;
++
++    virtual VOID HwlComputeXmaskCoordFromAddr(
++        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
++        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
++        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
++
++    // Surface mipmap
++    VOID    ComputeMipLevel(
++        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
++
++    /// Pure Virtual function for Hwl checking degrade for base level
++    virtual BOOL_32 HwlDegradeBaseLevel(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
++
++    virtual BOOL_32 HwlOverrideTileMode(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        AddrTileMode* pTileMode,
++        AddrTileType* pTileType) const
++    {
++        // not supported in hwl layer, FALSE for not-overrided
++        return FALSE;
++    }
++
++    AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const;
++
++    VOID PadDimensions(
++        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
++        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
++        UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
++        UINT_32* pSlices, UINT_32 sliceAlign) const;
++
++    virtual VOID HwlPadDimensions(
++        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
++        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
++        UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
++        UINT_32* pSlices, UINT_32 sliceAlign) const
++    {
++    }
++
++    //
++    // Addressing shared for linear/1D tiling
++    //
++    UINT_64 ComputeSurfaceAddrFromCoordLinear(
++        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
++        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
++        UINT_32* pBitPosition) const;
++
++    VOID    ComputeSurfaceCoordFromAddrLinear(
++        UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp,
++        UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
++        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
++
++    VOID    ComputeSurfaceCoordFromAddrMicroTiled(
++        UINT_64 addr, UINT_32 bitPosition,
++        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
++        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
++        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
++
++    UINT_32 ComputePixelIndexWithinMicroTile(
++        UINT_32 x, UINT_32 y, UINT_32 z,
++        UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const;
++
++    /// Pure Virtual function for Hwl computing coord from offset inside micro tile
++    virtual VOID HwlComputePixelCoordFromOffset(
++        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
++        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
++        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
++        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0;
++
++    //
++    // Addressing shared by all
++    //
++    virtual UINT_32 HwlGetPipes(
++        const ADDR_TILEINFO* pTileInfo) const;
++
++    UINT_32 ComputePipeFromAddr(
++        UINT_64 addr, UINT_32 numPipes) const;
++
++    /// Pure Virtual function for Hwl computing pipe from coord
++    virtual UINT_32 ComputePipeFromCoord(
++        UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode,
++        UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0;
++
++    /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile
++    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
++        UINT_32 pipe, UINT_32 x) const = 0;
++
++    //
++    // Initialization
++    //
++    /// Pure Virtual function for Hwl computing internal global parameters from h/w registers
++    virtual BOOL_32 HwlInitGlobalParams(
++        const ADDR_CREATE_INPUT* pCreateIn) = 0;
++
++    /// Pure Virtual function for Hwl converting chip family
++    virtual AddrChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0;
++
++    //
++    // Misc helper
++    //
++    static const AddrTileModeFlags m_modeFlags[ADDR_TM_COUNT];
++
++    static UINT_32 ComputeSurfaceThickness(
++        AddrTileMode tileMode);
++
++    // Checking tile mode
++    static BOOL_32 IsMacroTiled(AddrTileMode tileMode);
++    static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode);
++    static BOOL_32 IsLinear(AddrTileMode tileMode);
++    static BOOL_32 IsMicroTiled(AddrTileMode tileMode);
++    static BOOL_32 IsPrtTileMode(AddrTileMode tileMode);
++    static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode);
++
++    static UINT_32 Bits2Number(UINT_32 bitNum,...);
++
++    static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags)
++    {
++        return numFrags != 0 ? numFrags : Max(1u, numSamples);
++    }
++
++    /// Returns pointer of AddrElemLib
++    AddrElemLib* GetElemLib() const
++    {
++        return m_pElemLib;
++    }
++
++    /// Return TRUE if tile info is needed
++    BOOL_32 UseTileInfo() const
++    {
++        return !m_configFlags.ignoreTileInfo;
++    }
++
++    /// Returns fillSizeFields flag
++    UINT_32 GetFillSizeFieldsFlags() const
++    {
++        return m_configFlags.fillSizeFields;
++    }
++
++    /// Adjusts pitch alignment for flipping surface
++    VOID    AdjustPitchAlignment(
++        ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const;
++
++    /// Overwrite tile config according to tile index
++    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
++        INT_32 index, INT_32 macroModeIndex,
++        ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const;
++
++    /// Overwrite macro tile config according to tile index
++    virtual INT_32 HwlComputeMacroModeIndex(
++        INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
++        ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL
++        ) const
++    {
++        return TileIndexNoMacroIndex;
++    }
++
++    /// Pre-handler of 3x pitch (96 bit) adjustment
++    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
++    /// Post-handler of 3x pitch adjustment
++    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
++    /// Check miplevel after surface adjustment
++    ADDR_E_RETURNCODE PostComputeMipLevel(
++        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    /// Quad buffer stereo support, has its implementation in ind. layer
++    virtual BOOL_32 ComputeQbStereoInfo(
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    /// Pure virutual function to compute stereo bank swizzle for right eye
++    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
++
++private:
++    // Disallow the copy constructor
++    AddrLib(const AddrLib& a);
++
++    // Disallow the assignment operator
++    AddrLib& operator=(const AddrLib& a);
++
++    VOID SetAddrChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
++
++    UINT_32 ComputeCmaskBaseAlign(
++        ADDR_CMASK_FLAGS flags, ADDR_TILEINFO*  pTileInfo) const;
++
++    UINT_64 ComputeCmaskBytes(
++        UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const;
++
++    //
++    // CMASK/HTILE shared methods
++    //
++    VOID    ComputeTileDataWidthAndHeight(
++        UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
++        UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
++
++    UINT_32 ComputeXmaskCoordYFromPipe(
++        UINT_32 pipe, UINT_32 x) const;
++
++    VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
++
++    BOOL_32 DegradeBaseLevel(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const;
++
++protected:
++    AddrLibClass        m_class;        ///< Store class type (HWL type)
++
++    AddrChipFamily      m_chipFamily;   ///< Chip family translated from the one in atiid.h
++
++    UINT_32             m_chipRevision; ///< Revision id from xxx_id.h
++
++    UINT_32             m_version;      ///< Current version
++
++    //
++    // Global parameters
++    //
++    ADDR_CONFIG_FLAGS   m_configFlags;  ///< Global configuration flags. Note this is setup by
++                                        ///  AddrLib instead of Client except forceLinearAligned
++
++    UINT_32             m_pipes;        ///< Number of pipes
++    UINT_32             m_banks;        ///< Number of banks
++                                        ///  For r800 this is MC_ARB_RAMCFG.NOOFBANK
++                                        ///  Keep it here to do default parameter calculation
++
++    UINT_32             m_pipeInterleaveBytes;
++                                        ///< Specifies the size of contiguous address space
++                                        ///  within each tiling pipe when making linear
++                                        ///  accesses. (Formerly Group Size)
++
++    UINT_32             m_rowSize;      ///< DRAM row size, in bytes
++
++    UINT_32             m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels
++    UINT_32             m_maxSamples;   ///< Max numSamples
++private:
++    AddrElemLib*        m_pElemLib;     ///< Element Lib pointer
++};
++
++AddrLib* AddrSIHwlInit  (const AddrClient* pClient);
++AddrLib* AddrCIHwlInit  (const AddrClient* pClient);
++
++#endif
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrobject.cpp b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrobject.cpp
+new file mode 100644
+index 0000000..863a252
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrobject.cpp
+@@ -0,0 +1,246 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrobject.cpp
++* @brief Contains the AddrObject base class implementation.
++***************************************************************************************************
++*/
++
++#include "addrinterface.h"
++#include "addrobject.h"
++
++/**
++***************************************************************************************************
++*   AddrObject::AddrObject
++*
++*   @brief
++*       Constructor for the AddrObject class.
++***************************************************************************************************
++*/
++AddrObject::AddrObject()
++{
++    m_client.handle = NULL;
++    m_client.callbacks.allocSysMem = NULL;
++    m_client.callbacks.freeSysMem = NULL;
++    m_client.callbacks.debugPrint = NULL;
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::AddrObject
++*
++*   @brief
++*       Constructor for the AddrObject class.
++***************************************************************************************************
++*/
++AddrObject::AddrObject(const AddrClient* pClient)
++{
++    m_client = *pClient;
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::~AddrObject
++*
++*   @brief
++*       Destructor for the AddrObject class.
++***************************************************************************************************
++*/
++AddrObject::~AddrObject()
++{
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::ClientAlloc
++*
++*   @brief
++*       Calls instanced allocSysMem inside AddrClient
++***************************************************************************************************
++*/
++VOID* AddrObject::ClientAlloc(
++    size_t             objSize,    ///< [in] Size to allocate
++    const AddrClient*  pClient)    ///< [in] Client pointer
++{
++    VOID* pObjMem = NULL;
++
++    if (pClient->callbacks.allocSysMem != NULL)
++    {
++        ADDR_ALLOCSYSMEM_INPUT allocInput = {0};
++
++        allocInput.size        = sizeof(ADDR_ALLOCSYSMEM_INPUT);
++        allocInput.flags.value = 0;
++        allocInput.sizeInBytes = static_cast<UINT_32>(objSize);
++        allocInput.hClient     = pClient->handle;
++
++        pObjMem = pClient->callbacks.allocSysMem(&allocInput);
++    }
++
++    return pObjMem;
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::AddrMalloc
++*
++*   @brief
++*       A wrapper of ClientAlloc
++***************************************************************************************************
++*/
++VOID* AddrObject::AddrMalloc(
++    size_t objSize) const   ///< [in] Size to allocate
++{
++    return ClientAlloc(objSize, &m_client);;
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::ClientFree
++*
++*   @brief
++*       Calls freeSysMem inside AddrClient
++***************************************************************************************************
++*/
++VOID AddrObject::ClientFree(
++    VOID*              pObjMem,    ///< [in] User virtual address to free.
++    const AddrClient*  pClient)    ///< [in] Client pointer
++{
++    if (pClient->callbacks.freeSysMem != NULL)
++    {
++        if (pObjMem != NULL)
++        {
++            ADDR_FREESYSMEM_INPUT freeInput = {0};
++
++            freeInput.size      = sizeof(ADDR_FREESYSMEM_INPUT);
++            freeInput.hClient   = pClient->handle;
++            freeInput.pVirtAddr = pObjMem;
++
++            pClient->callbacks.freeSysMem(&freeInput);
++        }
++    }
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::AddrFree
++*
++*   @brief
++*       A wrapper of ClientFree
++***************************************************************************************************
++*/
++VOID AddrObject::AddrFree(
++    VOID* pObjMem) const                 ///< [in] User virtual address to free.
++{
++    ClientFree(pObjMem, &m_client);
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::operator new
++*
++*   @brief
++*       Allocates memory needed for AddrObject object. (with ADDR_CLIENT_HANDLE)
++*
++*   @return
++*       Returns NULL if unsuccessful.
++***************************************************************************************************
++*/
++VOID* AddrObject::operator new(
++    size_t             objSize,    ///< [in] Size to allocate
++    const AddrClient*  pClient)    ///< [in] Client pointer
++{
++    return ClientAlloc(objSize, pClient);
++}
++
++
++/**
++***************************************************************************************************
++*   AddrObject::operator delete
++*
++*   @brief
++*       Frees AddrObject object memory.
++***************************************************************************************************
++*/
++VOID AddrObject::operator delete(
++    VOID* pObjMem,              ///< [in] User virtual address to free.
++    const AddrClient* pClient)  ///< [in] Client handle
++{
++    ClientFree(pObjMem, pClient);
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::operator delete
++*
++*   @brief
++*       Frees AddrObject object memory.
++***************************************************************************************************
++*/
++VOID AddrObject::operator delete(
++    VOID* pObjMem)                  ///< [in] User virtual address to free.
++{
++    AddrObject* pObj = static_cast<AddrObject*>(pObjMem);
++    ClientFree(pObjMem, &pObj->m_client);
++}
++
++/**
++***************************************************************************************************
++*   AddrObject::DebugPrint
++*
++*   @brief
++*       Print debug message
++*
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID AddrObject::DebugPrint(
++    const CHAR* pDebugString,     ///< [in] Debug string
++    ...) const
++{
++#if DEBUG
++    if (m_client.callbacks.debugPrint != NULL)
++    {
++        va_list ap;
++
++        va_start(ap, pDebugString);
++
++        ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};
++
++        debugPrintInput.size         = sizeof(ADDR_DEBUGPRINT_INPUT);
++        debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
++        debugPrintInput.hClient      = m_client.handle;
++        va_copy(debugPrintInput.ap, ap);
++
++        m_client.callbacks.debugPrint(&debugPrintInput);
++
++        va_end(ap);
++    }
++#endif
++}
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrobject.h b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrobject.h
+new file mode 100644
+index 0000000..3540088
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/core/addrobject.h
+@@ -0,0 +1,89 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  addrobject.h
++* @brief Contains the AddrObject base class definition.
++***************************************************************************************************
++*/
++
++#ifndef __ADDR_OBJECT_H__
++#define __ADDR_OBJECT_H__
++
++#include "addrtypes.h"
++#include "addrcommon.h"
++
++/**
++***************************************************************************************************
++* @brief This structure contains client specific data
++***************************************************************************************************
++*/
++struct AddrClient
++{
++    ADDR_CLIENT_HANDLE  handle;
++    ADDR_CALLBACKS      callbacks;
++};
++/**
++***************************************************************************************************
++* @brief This class is the base class for all ADDR class objects.
++***************************************************************************************************
++*/
++class AddrObject
++{
++public:
++    AddrObject();
++    AddrObject(const AddrClient* pClient);
++    virtual ~AddrObject();
++
++    VOID* operator new(size_t size, const AddrClient* pClient);
++    VOID  operator delete(VOID* pObj, const AddrClient* pClient);
++    VOID  operator delete(VOID* pObj);
++    VOID* AddrMalloc(size_t size) const;
++    VOID  AddrFree(VOID* pObj) const;
++
++    VOID DebugPrint(
++        const CHAR* pDebugString,
++        ...) const;
++
++    const AddrClient* GetClient() const {return &m_client;}
++
++protected:
++    AddrClient m_client;
++
++private:
++    static VOID* ClientAlloc(size_t size, const AddrClient* pClient);
++    static VOID  ClientFree(VOID* pObj, const AddrClient* pClient);
++
++    // disallow the copy constructor
++    AddrObject(const AddrObject& a);
++
++    // disallow the assignment operator
++    AddrObject& operator=(const AddrObject& a);
++};
++
++#endif
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/inc/chip/r800/si_gb_reg.h b/src/gallium/winsys/radeon/amdgpu/addrlib/inc/chip/r800/si_gb_reg.h
+new file mode 100644
+index 0000000..cf67f60
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/inc/chip/r800/si_gb_reg.h
+@@ -0,0 +1,155 @@
++#if !defined (__SI_GB_REG_H__)
++#define __SI_GB_REG_H__
++
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++//
++// Make sure the necessary endian defines are there.
++//
++#if defined(LITTLEENDIAN_CPU)
++#elif defined(BIGENDIAN_CPU)
++#else
++#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
++#endif
++
++/*
++ * GB_ADDR_CONFIG struct
++ */
++
++#if     defined(LITTLEENDIAN_CPU)
++
++     typedef struct _GB_ADDR_CONFIG_T {
++          unsigned int num_pipes                      : 3;
++          unsigned int                                : 1;
++          unsigned int pipe_interleave_size           : 3;
++          unsigned int                                : 1;
++          unsigned int bank_interleave_size           : 3;
++          unsigned int                                : 1;
++          unsigned int num_shader_engines             : 2;
++          unsigned int                                : 2;
++          unsigned int shader_engine_tile_size        : 3;
++          unsigned int                                : 1;
++          unsigned int num_gpus                       : 3;
++          unsigned int                                : 1;
++          unsigned int multi_gpu_tile_size            : 2;
++          unsigned int                                : 2;
++          unsigned int row_size                       : 2;
++          unsigned int num_lower_pipes                : 1;
++          unsigned int                                : 1;
++     } GB_ADDR_CONFIG_T;
++
++#elif       defined(BIGENDIAN_CPU)
++
++     typedef struct _GB_ADDR_CONFIG_T {
++          unsigned int                                : 1;
++          unsigned int num_lower_pipes                : 1;
++          unsigned int row_size                       : 2;
++          unsigned int                                : 2;
++          unsigned int multi_gpu_tile_size            : 2;
++          unsigned int                                : 1;
++          unsigned int num_gpus                       : 3;
++          unsigned int                                : 1;
++          unsigned int shader_engine_tile_size        : 3;
++          unsigned int                                : 2;
++          unsigned int num_shader_engines             : 2;
++          unsigned int                                : 1;
++          unsigned int bank_interleave_size           : 3;
++          unsigned int                                : 1;
++          unsigned int pipe_interleave_size           : 3;
++          unsigned int                                : 1;
++          unsigned int num_pipes                      : 3;
++     } GB_ADDR_CONFIG_T;
++
++#endif
++
++typedef union {
++     unsigned int val : 32;
++     GB_ADDR_CONFIG_T f;
++} GB_ADDR_CONFIG;
++
++#if       defined(LITTLEENDIAN_CPU)
++
++     typedef struct _GB_TILE_MODE_T {
++          unsigned int micro_tile_mode                : 2;
++          unsigned int array_mode                     : 4;
++          unsigned int pipe_config                    : 5;
++          unsigned int tile_split                     : 3;
++          unsigned int bank_width                     : 2;
++          unsigned int bank_height                    : 2;
++          unsigned int macro_tile_aspect              : 2;
++          unsigned int num_banks                      : 2;
++          unsigned int micro_tile_mode_new            : 3;
++          unsigned int sample_split                   : 2;
++          unsigned int                                : 5;
++     } GB_TILE_MODE_T;
++
++     typedef struct _GB_MACROTILE_MODE_T {
++          unsigned int bank_width                     : 2;
++          unsigned int bank_height                    : 2;
++          unsigned int macro_tile_aspect              : 2;
++          unsigned int num_banks                      : 2;
++          unsigned int                                : 24;
++     } GB_MACROTILE_MODE_T;
++
++#elif          defined(BIGENDIAN_CPU)
++
++     typedef struct _GB_TILE_MODE_T {
++          unsigned int                                : 5;
++          unsigned int sample_split                   : 2;
++          unsigned int micro_tile_mode_new            : 3;
++          unsigned int num_banks                      : 2;
++          unsigned int macro_tile_aspect              : 2;
++          unsigned int bank_height                    : 2;
++          unsigned int bank_width                     : 2;
++          unsigned int tile_split                     : 3;
++          unsigned int pipe_config                    : 5;
++          unsigned int array_mode                     : 4;
++          unsigned int micro_tile_mode                : 2;
++     } GB_TILE_MODE_T;
++
++     typedef struct _GB_MACROTILE_MODE_T {
++          unsigned int                                : 24;
++          unsigned int num_banks                      : 2;
++          unsigned int macro_tile_aspect              : 2;
++          unsigned int bank_height                    : 2;
++          unsigned int bank_width                     : 2;
++     } GB_MACROTILE_MODE_T;
++
++#endif
++
++typedef union {
++     unsigned int val : 32;
++     GB_TILE_MODE_T f;
++} GB_TILE_MODE;
++
++typedef union {
++     unsigned int val : 32;
++     GB_MACROTILE_MODE_T f;
++} GB_MACROTILE_MODE;
++
++#endif
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/inc/lnx_common_defs.h b/src/gallium/winsys/radeon/amdgpu/addrlib/inc/lnx_common_defs.h
+new file mode 100644
+index 0000000..61540f4
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/inc/lnx_common_defs.h
+@@ -0,0 +1,129 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++#ifndef _lnx_common_defs_h_
++#define _lnx_common_defs_h_
++
++#if DBG
++#include <stdarg.h>                         // We do not have any choice: need variable
++                                            // number of parameters support for debug
++                                            // build.
++#endif                                      // #if DBG
++
++//
++// --------------  External functions from Linux kernel driver ----------------
++//
++// Note: The definitions/declararions below must match the original ones.
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++typedef unsigned long __ke_size_t;              // as it is defined in firegl_public.h
++typedef int           __kernel_ptrdiff_t;       // as it is defined in posix_types.h
++
++
++#if !defined(ATI_API_CALL)
++#define ATI_API_CALL __attribute__((regparm(0)))
++#endif
++
++extern void * ATI_API_CALL __ke_memset(void* s, int c, __ke_size_t count);
++extern void * ATI_API_CALL __ke_memcpy(void* d, const void* s, __ke_size_t count);
++extern ATI_API_CALL __ke_size_t __ke_strlen(const char *s);
++extern char* ATI_API_CALL __ke_strcpy(char* d, const char* s);
++extern char* ATI_API_CALL __ke_strncpy(char* d, const char* s, __ke_size_t count);
++extern void __ke_printk(const char* fmt, ...);
++
++extern int ATI_API_CALL __ke_snprintf(char* buf, __ke_size_t size, const char* fmt, ...);
++extern int ATI_API_CALL KCL_CopyFromUserSpace(void* to, const void* from, __ke_size_t size);
++extern int ATI_API_CALL KCL_CopyToUserSpace(void* to, const void* from, __ke_size_t size);
++#define __ke_copy_from_user  KCL_CopyFromUserSpace
++#define __ke_copy_to_user    KCL_CopyToUserSpace
++extern int ATI_API_CALL __ke_verify_area(int type, const void * addr, unsigned long size);
++
++extern unsigned long ATI_API_CALL KAS_GetTickCounter(void);
++extern unsigned long ATI_API_CALL KAS_GetTicksPerSecond(void);
++
++
++#if DBG
++extern int ATI_API_CALL __ke_vsnprintf(char *buf, __ke_size_t size, const char *fmt, va_list ap);
++#define vsnprintf(_dst, _size, _fmt, varg)  __ke_snprintf(_dst, _size, _fmt, varg)
++#endif                                      // #if DBG
++
++
++// Note: This function is not defined in firegl_public.h.
++void    firegl_hardwareHangRecovery(void);
++
++#ifdef __cplusplus
++}
++#endif
++
++//
++// --------------------------  C/C++ standard typedefs ----------------------------
++//
++#ifdef __SIZE_TYPE__
++typedef __SIZE_TYPE__       size_t;
++#else                                       // #ifdef __SIZE_TYPE__
++typedef unsigned int        size_t;
++#endif                                      // #ifdef __SIZE_TYPE__
++
++#ifdef __PTRDIFF_TYPE__
++typedef __PTRDIFF_TYPE__    ptrdiff_t;
++#else                                       // #ifdef __PTRDIFF_TYPE__
++typedef int                 ptrdiff_t;
++#endif                                      // #ifdef __PTRDIFF_TYPE__
++
++#ifndef NULL
++#ifdef __cplusplus
++#define NULL    __null
++#else
++#define NULL    ((void *)0)
++#endif
++#endif
++
++
++//
++// -------------------------  C/C++ standard macros ---------------------------
++//
++
++#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)  // as it is defined in stddef.h
++#define CHAR_BIT            8                                   // as it is defined in limits.h
++
++//
++// ---------------------------------  C RTL -----------------------------------
++//
++
++#define memset(_p, _v, _n)                  __ke_memset(_p, _v, _n)
++#define memcpy(_d, _s, _n)                  __ke_memcpy(_d, _s, _n)
++#define strlen(_s)                          __ke_strlen(_s)
++#define strcpy(_d, _s)                      __ke_strcpy(_d, _s)
++#define strncpy(_d, _s, _n)                 __ke_strncpy(_d, _s, _n)
++// Note: C99 supports macros with variable number of arguments. GCC also supports this C99 feature as
++//       C++ extension.
++#define snprintf(_dst, _size, _fmt, arg...) __ke_snprintf(_dst, _size, _fmt, ##arg)
++
++
++#endif                                      // #ifdef _lnx_common_defs_h_
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/r800/chip/si_ci_vi_merged_enum.h b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/chip/si_ci_vi_merged_enum.h
+new file mode 100644
+index 0000000..5ed81ad
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/chip/si_ci_vi_merged_enum.h
+@@ -0,0 +1,40 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++#if !defined (SI_CI_VI_MERGED_ENUM_HEADER)
++#define SI_CI_VI_MERGED_ENUM_HEADER
++
++typedef enum PipeInterleaveSize {
++ADDR_CONFIG_PIPE_INTERLEAVE_256B         = 0x00000000,
++ADDR_CONFIG_PIPE_INTERLEAVE_512B         = 0x00000001,
++} PipeInterleaveSize;
++
++typedef enum RowSize {
++ADDR_CONFIG_1KB_ROW                      = 0x00000000,
++ADDR_CONFIG_2KB_ROW                      = 0x00000001,
++ADDR_CONFIG_4KB_ROW                      = 0x00000002,
++} RowSize;
++
++#endif
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.cpp b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.cpp
+new file mode 100644
+index 0000000..264e2ef
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.cpp
+@@ -0,0 +1,1777 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  ciaddrlib.cpp
++* @brief Contains the implementation for the CIAddrLib class.
++***************************************************************************************************
++*/
++
++#include "ciaddrlib.h"
++
++#include "si_gb_reg.h"
++
++#include "si_ci_vi_merged_enum.h"
++
++#if BRAHMA_BUILD
++#include "amdgpu_id.h"
++#else
++#include "ci_id.h"
++#include "kv_id.h"
++#include "vi_id.h"
++#endif
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrMask
++*
++*   @brief
++*       Gets a mask of "width"
++*   @return
++*       Bit mask
++***************************************************************************************************
++*/
++static UINT_64 AddrMask(
++    UINT_32 width)  ///< Width of bits
++{
++    UINT_64 ret;
++
++    if (width >= sizeof(UINT_64)*8)
++    {
++        ret = ~((UINT_64) 0);
++    }
++    else
++    {
++        return (((UINT_64) 1) << width) - 1;
++    }
++    return ret;
++}
++
++/**
++***************************************************************************************************
++*   AddrGetBits
++*
++*   @brief
++*       Gets bits within a range of [msb, lsb]
++*   @return
++*       Bits of this range
++***************************************************************************************************
++*/
++static UINT_64 AddrGetBits(
++    UINT_64 bits,   ///< Source bits
++    UINT_32 msb,    ///< Most signicant bit
++    UINT_32 lsb)    ///< Least signicant bit
++{
++    UINT_64 ret = 0;
++
++    if (msb >= lsb)
++    {
++        ret = (bits >> lsb) & (AddrMask(1 + msb - lsb));
++    }
++    return ret;
++}
++
++/**
++***************************************************************************************************
++*   AddrRemoveBits
++*
++*   @brief
++*       Removes bits within the range of [msb, lsb]
++*   @return
++*       Modified bits
++***************************************************************************************************
++*/
++static UINT_64 AddrRemoveBits(
++    UINT_64 bits,   ///< Source bits
++    UINT_32 msb,    ///< Most signicant bit
++    UINT_32 lsb)    ///< Least signicant bit
++{
++    UINT_64 ret = bits;
++
++    if (msb >= lsb)
++    {
++        ret = AddrGetBits(bits, lsb - 1, 0) // low bits
++            | (AddrGetBits(bits, 8 * sizeof(bits) - 1, msb + 1) << lsb); //high bits
++    }
++    return ret;
++}
++
++/**
++***************************************************************************************************
++*   AddrInsertBits
++*
++*   @brief
++*       Inserts new bits into the range of [msb, lsb]
++*   @return
++*       Modified bits
++***************************************************************************************************
++*/
++static UINT_64 AddrInsertBits(
++    UINT_64 bits,       ///< Source bits
++    UINT_64 newBits,    ///< New bits to be inserted
++    UINT_32 msb,        ///< Most signicant bit
++    UINT_32 lsb)        ///< Least signicant bit
++{
++    UINT_64 ret = bits;
++
++    if (msb >= lsb)
++    {
++        ret = AddrGetBits(bits, lsb - 1, 0) // old low bitss
++             | (AddrGetBits(newBits, msb - lsb, 0) << lsb) //new bits
++             | (AddrGetBits(bits, 8 * sizeof(bits) - 1, lsb) << (msb + 1)); //old high bits
++    }
++    return ret;
++}
++
++
++/**
++***************************************************************************************************
++*   AddrCIHwlInit
++*
++*   @brief
++*       Creates an CIAddrLib object.
++*
++*   @return
++*       Returns an CIAddrLib object pointer.
++***************************************************************************************************
++*/
++AddrLib* AddrCIHwlInit(const AddrClient* pClient)
++{
++    return CIAddrLib::CreateObj(pClient);
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::CIAddrLib
++*
++*   @brief
++*       Constructor
++*
++***************************************************************************************************
++*/
++CIAddrLib::CIAddrLib(const AddrClient* pClient) :
++    SIAddrLib(pClient),
++    m_noOfMacroEntries(0),
++    m_allowNonDispThickModes(FALSE)
++{
++    m_class = CI_ADDRLIB;
++    memset(&m_settings, 0, sizeof(m_settings));
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::~CIAddrLib
++*
++*   @brief
++*       Destructor
++***************************************************************************************************
++*/
++CIAddrLib::~CIAddrLib()
++{
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlComputeDccInfo
++*
++*   @brief
++*       Compute DCC key size, base alignment
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE CIAddrLib::HwlComputeDccInfo(
++    const ADDR_COMPUTE_DCCINFO_INPUT*  pIn,
++    ADDR_COMPUTE_DCCINFO_OUTPUT*       pOut) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    if (m_settings.isVolcanicIslands && IsMacroTiled(pIn->tileMode))
++    {
++        UINT_64 dccFastClearSize = pIn->colorSurfSize >> 8;
++
++        ADDR_ASSERT(0 == (pIn->colorSurfSize & 0xff));
++
++        if (pIn->numSamples > 1)
++        {
++            UINT_32 tileSizePerSample = BITS_TO_BYTES(pIn->bpp * MicroTileWidth * MicroTileHeight);
++            UINT_32 samplesPerSplit  = pIn->tileInfo.tileSplitBytes / tileSizePerSample;
++
++            if (samplesPerSplit < pIn->numSamples)
++            {
++                UINT_32 numSplits = pIn->numSamples / samplesPerSplit;
++                UINT_32 fastClearBaseAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
++
++                ADDR_ASSERT(IsPow2(fastClearBaseAlign));
++
++                dccFastClearSize /= numSplits;
++
++                if (0 != (dccFastClearSize & (fastClearBaseAlign - 1)))
++                {
++                    // Disable dcc fast clear
++                    // if key size of fisrt sample split is not pipe*interleave aligned
++                    dccFastClearSize = 0;
++                }
++            }
++        }
++
++        pOut->dccRamSize          = pIn->colorSurfSize >> 8;
++        pOut->dccRamBaseAlign     = pIn->tileInfo.banks *
++                                    HwlGetPipes(&pIn->tileInfo) *
++                                    m_pipeInterleaveBytes;
++        pOut->dccFastClearSize    = dccFastClearSize;
++
++        ADDR_ASSERT(IsPow2(pOut->dccRamBaseAlign));
++
++        if (0 == (pOut->dccRamSize & (pOut->dccRamBaseAlign - 1)))
++        {
++            pOut->subLvlCompressible = TRUE;
++        }
++        else
++        {
++            UINT_64 dccRamSizeAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
++
++            if (pOut->dccRamSize == pOut->dccFastClearSize)
++            {
++                pOut->dccFastClearSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
++            }
++            pOut->dccRamSize          = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
++            pOut->subLvlCompressible  = FALSE;
++        }
++    }
++    else
++    {
++        returnCode = ADDR_NOTSUPPORTED;
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlComputeCmaskAddrFromCoord
++*
++*   @brief
++*       Compute tc compatible Cmask address from fmask ram address
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE CIAddrLib::HwlComputeCmaskAddrFromCoord(
++    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,  ///< [in] fmask addr/bpp/tile input
++    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*       pOut  ///< [out] cmask address
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED;
++
++    if ((m_settings.isVolcanicIslands == TRUE) &&
++        (pIn->flags.tcCompatible == TRUE))
++    {
++        UINT_32 numOfPipes   = HwlGetPipes(pIn->pTileInfo);
++        UINT_32 numOfBanks   = pIn->pTileInfo->banks;
++        UINT_64 fmaskAddress = pIn->fmaskAddr;
++        UINT_32 elemBits     = pIn->bpp;
++        UINT_32 blockByte    = 64 * elemBits / 8;
++        UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(fmaskAddress,
++                                                                    0,
++                                                                    0,
++                                                                    4,
++                                                                    elemBits,
++                                                                    blockByte,
++                                                                    m_pipeInterleaveBytes,
++                                                                    numOfPipes,
++                                                                    numOfBanks,
++                                                                    1);
++        pOut->addr = (metaNibbleAddress >> 1);
++        pOut->bitPosition = (metaNibbleAddress % 2) ? 4 : 0;
++        returnCode = ADDR_OK;
++    }
++
++    return returnCode;
++}
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlConvertChipFamily
++*
++*   @brief
++*       Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
++*   @return
++*       AddrChipFamily
++***************************************************************************************************
++*/
++AddrChipFamily CIAddrLib::HwlConvertChipFamily(
++    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
++    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
++{
++    AddrChipFamily family = ADDR_CHIP_FAMILY_CI;
++
++    switch (uChipFamily)
++    {
++        case FAMILY_CI:
++            m_settings.isSeaIsland  = 1;
++            m_settings.isBonaire    = ASICREV_IS_BONAIRE_M(uChipRevision);
++            m_settings.isHawaii     = ASICREV_IS_HAWAII_P(uChipRevision);
++            break;
++        case FAMILY_KV:
++            m_settings.isKaveri     = 1;
++            m_settings.isSpectre    = ASICREV_IS_SPECTRE(uChipRevision);
++            m_settings.isSpooky     = ASICREV_IS_SPOOKY(uChipRevision);
++            m_settings.isKalindi    = ASICREV_IS_KALINDI(uChipRevision);
++            break;
++        case FAMILY_VI:
++            m_settings.isVolcanicIslands = 1;
++            m_settings.isIceland         = ASICREV_IS_ICELAND_M(uChipRevision);
++            m_settings.isTonga           = ASICREV_IS_TONGA_P(uChipRevision);
++            break;
++        case FAMILY_CZ:
++            m_settings.isCarrizo         = 1;
++            m_settings.isVolcanicIslands = 1;
++            break;
++        default:
++            ADDR_ASSERT(!"This should be a unexpected Fusion");
++            break;
++    }
++
++    return family;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlInitGlobalParams
++*
++*   @brief
++*       Initializes global parameters
++*
++*   @return
++*       TRUE if all settings are valid
++*
++***************************************************************************************************
++*/
++BOOL_32 CIAddrLib::HwlInitGlobalParams(
++    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
++{
++    BOOL_32  valid = TRUE;
++
++    const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
++
++    valid = DecodeGbRegs(pRegValue);
++
++    // The following assignments for m_pipes is only for fail-safe, InitTileSettingTable should
++    // read the correct pipes from tile mode table
++    if (m_settings.isHawaii)
++    {
++        // Hawaii has 16-pipe, see GFXIP_Config_Summary.xls
++        m_pipes = 16;
++    }
++    else if (m_settings.isBonaire || m_settings.isSpectre)
++    {
++        m_pipes = 4;
++    }
++    else // Treat other KV asics to be 2-pipe
++    {
++        m_pipes = 2;
++    }
++
++    // @todo: VI
++    // Move this to VI code path once created
++    if (m_settings.isTonga)
++    {
++        m_pipes = 8;
++    }
++    else if (m_settings.isIceland)
++    {
++        m_pipes = 2;
++    }
++
++    if (valid)
++    {
++        valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
++    }
++    if (valid)
++    {
++        valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries);
++    }
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlPostCheckTileIndex
++*
++*   @brief
++*       Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
++*       tile mode/type/info and change the index if needed
++*   @return
++*       Tile index.
++***************************************************************************************************
++*/
++INT_32 CIAddrLib::HwlPostCheckTileIndex(
++    const ADDR_TILEINFO* pInfo,     ///< [in] Tile Info
++    AddrTileMode         mode,      ///< [in] Tile mode
++    AddrTileType         type,      ///< [in] Tile type
++    INT                  curIndex   ///< [in] Current index assigned in HwlSetupTileInfo
++    ) const
++{
++    INT_32 index = curIndex;
++
++    if (mode == ADDR_TM_LINEAR_GENERAL)
++    {
++        index = TileIndexLinearGeneral;
++    }
++    else
++    {
++        BOOL_32 macroTiled = IsMacroTiled(mode);
++
++        // We need to find a new index if either of them is true
++        // 1. curIndex is invalid
++        // 2. tile mode is changed
++        // 3. tile info does not match for macro tiled
++        if ((index == TileIndexInvalid)         ||
++            (mode != m_tileTable[index].mode)   ||
++            (macroTiled && pInfo->pipeConfig != m_tileTable[index].info.pipeConfig))
++        {
++            for (index = 0; index < static_cast<INT_32>(m_noOfEntries); index++)
++            {
++                if (macroTiled)
++                {
++                    // macro tile modes need all to match
++                    if ((pInfo->pipeConfig == m_tileTable[index].info.pipeConfig) &&
++                        (mode == m_tileTable[index].mode) &&
++                        (type == m_tileTable[index].type))
++                    {
++                        // tileSplitBytes stored in m_tileTable is only valid for depth entries
++                        if (type == ADDR_DEPTH_SAMPLE_ORDER)
++                        {
++                            if (pInfo->tileSplitBytes == m_tileTable[index].info.tileSplitBytes)
++                            {
++                                break;
++                            }
++                        }
++                        else // other entries are determined by other 3 fields
++                        {
++                            break;
++                        }
++                    }
++                }
++                else if (mode == ADDR_TM_LINEAR_ALIGNED)
++                {
++                    // linear mode only needs tile mode to match
++                    if (mode == m_tileTable[index].mode)
++                    {
++                        break;
++                    }
++                }
++                else
++                {
++                    // micro tile modes only need tile mode and tile type to match
++                    if (mode == m_tileTable[index].mode &&
++                        type == m_tileTable[index].type)
++                    {
++                        break;
++                    }
++                }
++            }
++        }
++    }
++
++    ADDR_ASSERT(index < static_cast<INT_32>(m_noOfEntries));
++
++    if (index >= static_cast<INT_32>(m_noOfEntries))
++    {
++        index = TileIndexInvalid;
++    }
++
++    return index;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlSetupTileCfg
++*
++*   @brief
++*       Map tile index to tile setting.
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE CIAddrLib::HwlSetupTileCfg(
++    INT_32          index,          ///< [in] Tile index
++    INT_32          macroModeIndex, ///< [in] Index in macro tile mode table(CI)
++    ADDR_TILEINFO*  pInfo,          ///< [out] Tile Info
++    AddrTileMode*   pMode,          ///< [out] Tile mode
++    AddrTileType*   pType           ///< [out] Tile type
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    // Global flag to control usage of tileIndex
++    if (UseTileIndex(index))
++    {
++        if (static_cast<UINT_32>(index) >= m_noOfEntries)
++        {
++            returnCode = ADDR_INVALIDPARAMS;
++        }
++        else
++        {
++            const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index);
++
++            if (pInfo != NULL)
++            {
++                if (IsMacroTiled(pCfgTable->mode))
++                {
++                    ADDR_ASSERT(((macroModeIndex != TileIndexInvalid)
++                        && (macroModeIndex != TileIndexNoMacroIndex)));
++                    // Here we used tile_bytes to replace of tile_split
++                    // According info as below:
++                    // "tile_split_c = MIN(ROW_SIZE, tile_split)
++                    // "tile_bytes = MIN(tile_split_c, num_samples * tile_bytes_1x)
++                    // when using tile_bytes replacing of tile_split, the result of
++                    // alignment and others(such as slicesPerTile) are unaffected -
++                    // since if tile_split_c is larger, split won't happen, otherwise
++                    // (num_samples * tile_bytes_1x is larger), a correct tile_split is
++                    // returned.
++                    *pInfo = m_macroTileTable[macroModeIndex];
++
++                    if (pCfgTable->type == ADDR_DEPTH_SAMPLE_ORDER)
++                    {
++                        pInfo->tileSplitBytes = pCfgTable->info.tileSplitBytes;
++                    }
++                    pInfo->pipeConfig = pCfgTable->info.pipeConfig;
++                }
++                else // 1D and linear modes, we return default value stored in table
++                {
++                    *pInfo = pCfgTable->info;
++                }
++            }
++
++            if (pMode != NULL)
++            {
++                *pMode = pCfgTable->mode;
++            }
++
++            if (pType != NULL)
++            {
++                *pType = pCfgTable->type;
++            }
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlComputeSurfaceInfo
++*
++*   @brief
++*       Entry of ci's ComputeSurfaceInfo
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE CIAddrLib::HwlComputeSurfaceInfo(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
++    ) const
++{
++    // If tileIndex is invalid, force macroModeIndex to be invalid, too
++    if (pIn->tileIndex == TileIndexInvalid)
++    {
++        pOut->macroModeIndex = TileIndexInvalid;
++    }
++
++    ADDR_E_RETURNCODE retCode = SIAddrLib::HwlComputeSurfaceInfo(pIn,pOut);
++
++    if (pOut->macroModeIndex == TileIndexNoMacroIndex)
++    {
++        pOut->macroModeIndex = TileIndexInvalid;
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlFmaskSurfaceInfo
++*   @brief
++*       Entry of r800's ComputeFmaskInfo
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE CIAddrLib::HwlComputeFmaskInfo(
++    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
++    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut   ///< [out] output structure
++    )
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++    ADDR_TILEINFO tileInfo = {0};
++    ADDR_COMPUTE_FMASK_INFO_INPUT fmaskIn;
++    fmaskIn = *pIn;
++
++    AddrTileMode tileMode = pIn->tileMode;
++
++    // Use internal tile info if pOut does not have a valid pTileInfo
++    if (pOut->pTileInfo == NULL)
++    {
++        pOut->pTileInfo = &tileInfo;
++    }
++
++    ADDR_ASSERT(tileMode == ADDR_TM_2D_TILED_THIN1     ||
++                tileMode == ADDR_TM_3D_TILED_THIN1     ||
++                tileMode == ADDR_TM_PRT_TILED_THIN1    ||
++                tileMode == ADDR_TM_PRT_2D_TILED_THIN1 ||
++                tileMode == ADDR_TM_PRT_3D_TILED_THIN1);
++
++    ADDR_ASSERT(m_tileTable[14].mode == ADDR_TM_2D_TILED_THIN1);
++    ADDR_ASSERT(m_tileTable[15].mode == ADDR_TM_3D_TILED_THIN1);
++
++    // The only valid tile modes for fmask are 2D_THIN1 and 3D_THIN1 plus non-displayable
++    INT_32 tileIndex = tileMode == ADDR_TM_2D_TILED_THIN1 ? 14 : 15;
++    ADDR_SURFACE_FLAGS flags = {{0}};
++    flags.fmask = 1;
++
++    INT_32 macroModeIndex = TileIndexInvalid;
++
++    UINT_32 numSamples = pIn->numSamples;
++    UINT_32 numFrags = pIn->numFrags == 0 ? numSamples : pIn->numFrags;
++
++    UINT_32 bpp = QLog2(numFrags);
++
++    // EQAA needs one more bit
++    if (numSamples > numFrags)
++    {
++        bpp++;
++    }
++
++    if (bpp == 3)
++    {
++        bpp = 4;
++    }
++
++    bpp = Max(8u, bpp * numSamples);
++
++    macroModeIndex = HwlComputeMacroModeIndex(tileIndex, flags, bpp, numSamples, pOut->pTileInfo);
++
++    fmaskIn.tileIndex = tileIndex;
++    fmaskIn.pTileInfo = pOut->pTileInfo;
++    pOut->macroModeIndex = macroModeIndex;
++    pOut->tileIndex = tileIndex;
++
++    retCode = DispatchComputeFmaskInfo(&fmaskIn, pOut);
++
++    if (retCode == ADDR_OK)
++    {
++        pOut->tileIndex =
++            HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
++                                  pOut->tileIndex);
++    }
++
++    // Resets pTileInfo to NULL if the internal tile info is used
++    if (pOut->pTileInfo == &tileInfo)
++    {
++        pOut->pTileInfo = NULL;
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlFmaskPreThunkSurfInfo
++*
++*   @brief
++*       Some preparation before thunking a ComputeSurfaceInfo call for Fmask
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++VOID CIAddrLib::HwlFmaskPreThunkSurfInfo(
++    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pFmaskIn,   ///< [in] Input of fmask info
++    const ADDR_COMPUTE_FMASK_INFO_OUTPUT*   pFmaskOut,  ///< [in] Output of fmask info
++    ADDR_COMPUTE_SURFACE_INFO_INPUT*        pSurfIn,    ///< [out] Input of thunked surface info
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pSurfOut    ///< [out] Output of thunked surface info
++    ) const
++{
++    pSurfIn->tileIndex = pFmaskIn->tileIndex;
++    pSurfOut->macroModeIndex  = pFmaskOut->macroModeIndex;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlFmaskPostThunkSurfInfo
++*
++*   @brief
++*       Copy hwl extra field after calling thunked ComputeSurfaceInfo
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++VOID CIAddrLib::HwlFmaskPostThunkSurfInfo(
++    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,   ///< [in] Output of surface info
++    ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut           ///< [out] Output of fmask info
++    ) const
++{
++    pFmaskOut->tileIndex = pSurfOut->tileIndex;
++    pFmaskOut->macroModeIndex = pSurfOut->macroModeIndex;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlDegradeThickTileMode
++*
++*   @brief
++*       Degrades valid tile mode for thick modes if needed
++*
++*   @return
++*       Suitable tile mode
++***************************************************************************************************
++*/
++AddrTileMode CIAddrLib::HwlDegradeThickTileMode(
++    AddrTileMode        baseTileMode,   ///< [in] base tile mode
++    UINT_32             numSlices,      ///< [in] current number of slices
++    UINT_32*            pBytesPerTile   ///< [in/out] pointer to bytes per slice
++    ) const
++{
++    return baseTileMode;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlOverrideTileMode
++*
++*   @brief
++*       Override THICK to THIN, for specific formats on CI
++*
++*   @return
++*       Suitable tile mode
++*
++***************************************************************************************************
++*/
++BOOL_32 CIAddrLib::HwlOverrideTileMode(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,       ///< [in] input structure
++    AddrTileMode*                           pTileMode, ///< [in/out] pointer to the tile mode
++    AddrTileType*                           pTileType  ///< [in/out] pointer to the tile type
++    ) const
++{
++    BOOL_32 bOverrided = FALSE;
++    AddrTileMode tileMode = *pTileMode;
++
++    // currently, all CI/VI family do not
++    // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and
++    // ADDR_TM_PRT_2D_TILED_THIN1, ADDR_TM_PRT_3D_TILED_THIN1
++    switch (tileMode)
++    {
++        case ADDR_TM_PRT_2D_TILED_THICK:
++        case ADDR_TM_PRT_3D_TILED_THICK:
++            tileMode = ADDR_TM_PRT_TILED_THICK;
++            break;
++        case ADDR_TM_PRT_2D_TILED_THIN1:
++        case ADDR_TM_PRT_3D_TILED_THIN1:
++            tileMode = ADDR_TM_PRT_TILED_THIN1;
++            break;
++        default:
++            break;
++    }
++
++    // UBTS#404321, we do not need such overriding, as THICK+THICK entries removed from the tile-mode table
++    if (!m_settings.isBonaire)
++    {
++        UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++
++        // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1)
++        if (thickness > 1)
++        {
++            switch (pIn->format)
++            {
++                // see //gfxip/gcB/devel/cds/src/verif/tc/models/csim/tcp.cpp
++                // tcpError("Thick micro tiling is not supported for format...
++                case ADDR_FMT_X24_8_32_FLOAT:
++                case ADDR_FMT_32_AS_8:
++                case ADDR_FMT_32_AS_8_8:
++                case ADDR_FMT_32_AS_32_32_32_32:
++
++                // packed formats
++                case ADDR_FMT_GB_GR:
++                case ADDR_FMT_BG_RG:
++                case ADDR_FMT_1_REVERSED:
++                case ADDR_FMT_1:
++                case ADDR_FMT_BC1:
++                case ADDR_FMT_BC2:
++                case ADDR_FMT_BC3:
++                case ADDR_FMT_BC4:
++                case ADDR_FMT_BC5:
++                case ADDR_FMT_BC6:
++                case ADDR_FMT_BC7:
++                    switch (tileMode)
++                    {
++                        case ADDR_TM_1D_TILED_THICK:
++                            tileMode    = ADDR_TM_1D_TILED_THIN1;
++                            break;
++
++                        case ADDR_TM_2D_TILED_XTHICK:
++                        case ADDR_TM_2D_TILED_THICK:
++                            tileMode    = ADDR_TM_2D_TILED_THIN1;
++                            break;
++
++                        case ADDR_TM_3D_TILED_XTHICK:
++                        case ADDR_TM_3D_TILED_THICK:
++                            tileMode    = ADDR_TM_3D_TILED_THIN1;
++                            break;
++
++                        case ADDR_TM_PRT_TILED_THICK:
++                            tileMode    = ADDR_TM_PRT_TILED_THIN1;
++                            break;
++
++                        case ADDR_TM_PRT_2D_TILED_THICK:
++                            tileMode    = ADDR_TM_PRT_2D_TILED_THIN1;
++                            break;
++
++                        case ADDR_TM_PRT_3D_TILED_THICK:
++                            tileMode    = ADDR_TM_PRT_3D_TILED_THIN1;
++                            break;
++
++                        default:
++                            break;
++
++                    }
++
++                    // Switch tile type from thick to thin
++                    if (tileMode != *pTileMode)
++                    {
++                        // see tileIndex: 13-18
++                        *pTileType = ADDR_NON_DISPLAYABLE;
++                    }
++
++                    break;
++                default:
++                    break;
++            }
++        }
++    }
++
++    if (tileMode != *pTileMode)
++    {
++        *pTileMode = tileMode;
++        bOverrided = TRUE;
++    }
++
++    return bOverrided;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlSetupTileInfo
++*
++*   @brief
++*       Setup default value of tile info for SI
++***************************************************************************************************
++*/
++VOID CIAddrLib::HwlSetupTileInfo(
++    AddrTileMode                        tileMode,       ///< [in] Tile mode
++    ADDR_SURFACE_FLAGS                  flags,          ///< [in] Surface type flags
++    UINT_32                             bpp,            ///< [in] Bits per pixel
++    UINT_32                             pitch,          ///< [in] Pitch in pixels
++    UINT_32                             height,         ///< [in] Height in pixels
++    UINT_32                             numSamples,     ///< [in] Number of samples
++    ADDR_TILEINFO*                      pTileInfoIn,    ///< [in] Tile info input: NULL for default
++    ADDR_TILEINFO*                      pTileInfoOut,   ///< [out] Tile info output
++    AddrTileType                        inTileType,     ///< [in] Tile type
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut            ///< [out] Output
++    ) const
++{
++    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++    ADDR_TILEINFO* pTileInfo = pTileInfoOut;
++    INT index = TileIndexInvalid;
++    INT macroModeIndex = TileIndexInvalid;
++
++    // Fail-safe code
++    if (!IsLinear(tileMode))
++    {
++        // Thick tile modes must use thick micro tile mode but Bonaire does not support due to
++        // old derived netlists (UBTS 404321)
++        if (thickness > 1)
++        {
++            if (m_settings.isBonaire)
++            {
++                inTileType = ADDR_NON_DISPLAYABLE;
++            }
++            else if ((m_allowNonDispThickModes == FALSE) || (inTileType != ADDR_NON_DISPLAYABLE))
++            {
++                inTileType = ADDR_THICK;
++            }
++        }
++        // 128 bpp tiling must be non-displayable.
++        // Fmask reuse color buffer's entry but bank-height field can be from another entry
++        // To simplify the logic, fmask entry should be picked from non-displayable ones
++        else if (bpp == 128 || flags.fmask)
++        {
++            inTileType = ADDR_NON_DISPLAYABLE;
++        }
++        // These two modes only have non-disp entries though they can be other micro tile modes
++        else if (tileMode == ADDR_TM_3D_TILED_THIN1 || tileMode == ADDR_TM_PRT_3D_TILED_THIN1)
++        {
++            inTileType = ADDR_NON_DISPLAYABLE;
++        }
++
++        if (flags.depth || flags.stencil)
++        {
++            inTileType = ADDR_DEPTH_SAMPLE_ORDER;
++        }
++    }
++
++    if (IsTileInfoAllZero(pTileInfo))
++    {
++        // See table entries 0-4
++        if (flags.depth || flags.stencil)
++        {
++            if (flags.depth && flags.tcCompatible)
++            {
++                // tileSize = bpp * numSamples * 8 * 8 / 8
++                UINT_32 tileSize = bpp * numSamples * 8;
++
++                // Texure readable depth surface should not be split
++                switch (tileSize)
++                {
++                    case 128:
++                        index = 1;
++                        break;
++                    case 256:
++                        index = 2;
++                        break;
++                    case 512:
++                        index = 3;
++                        break;
++                    default:
++                        index = 4;
++                        break;
++                }
++            }
++            else
++            {
++                // Depth and stencil need to use the same index, thus the pre-defined tile_split
++                // can meet the requirement to choose the same macro mode index
++                // uncompressed depth/stencil are not supported for now
++                switch (numSamples)
++                {
++                    case 1:
++                        index = 0;
++                        break;
++                    case 2:
++                    case 4:
++                        index = 1;
++                        break;
++                    case 8:
++                        index = 2;
++                        break;
++                    default:
++                        break;
++                }
++            }
++        }
++
++        // See table entries 5-6
++        if (inTileType == ADDR_DEPTH_SAMPLE_ORDER)
++        {
++            switch (tileMode)
++            {
++                case ADDR_TM_1D_TILED_THIN1:
++                    index = 5;
++                    break;
++                case ADDR_TM_PRT_TILED_THIN1:
++                    index = 6;
++                    break;
++                default:
++                    break;
++            }
++        }
++
++        // See table entries 8-12
++        if (inTileType == ADDR_DISPLAYABLE)
++        {
++            switch (tileMode)
++            {
++                case ADDR_TM_1D_TILED_THIN1:
++                    index = 9;
++                    break;
++                case ADDR_TM_2D_TILED_THIN1:
++                    index = 10;
++                    break;
++                case ADDR_TM_PRT_TILED_THIN1:
++                    index = 11;
++                    break;
++                default:
++                    break;
++            }
++        }
++
++        // See table entries 13-18
++        if (inTileType == ADDR_NON_DISPLAYABLE)
++        {
++            switch (tileMode)
++            {
++                case ADDR_TM_1D_TILED_THIN1:
++                    index = 13;
++                    break;
++                case ADDR_TM_2D_TILED_THIN1:
++                    index = 14;
++                    break;
++                case ADDR_TM_3D_TILED_THIN1:
++                    index = 15;
++                    break;
++                case ADDR_TM_PRT_TILED_THIN1:
++                    index = 16;
++                    break;
++                default:
++                    break;
++            }
++        }
++
++        // See table entries 19-26
++        if (thickness > 1)
++        {
++            switch (tileMode)
++            {
++            case ADDR_TM_1D_TILED_THICK:
++                    //special check for bonaire, for the compatablity between old KMD and new UMD for bonaire
++                    index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 19 : 18;
++                    break;
++            case ADDR_TM_2D_TILED_THICK:
++                    // special check for bonaire, for the compatablity between old KMD and new UMD for bonaire
++                    index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 20 : 24;
++                    break;
++                case ADDR_TM_3D_TILED_THICK:
++                    index = 21;
++                    break;
++                case ADDR_TM_PRT_TILED_THICK:
++                    index = 22;
++                    break;
++                case ADDR_TM_2D_TILED_XTHICK:
++                    index = 25;
++                    break;
++                case ADDR_TM_3D_TILED_XTHICK:
++                    index = 26;
++                    break;
++                default:
++                    break;
++            }
++        }
++
++        // See table entries 27-30
++        if (inTileType == ADDR_ROTATED)
++        {
++            switch (tileMode)
++            {
++                case ADDR_TM_1D_TILED_THIN1:
++                    index = 27;
++                    break;
++                case ADDR_TM_2D_TILED_THIN1:
++                    index = 28;
++                    break;
++                case ADDR_TM_PRT_TILED_THIN1:
++                    index = 29;
++                    break;
++                case ADDR_TM_PRT_2D_TILED_THIN1:
++                    index = 30;
++                    break;
++                default:
++                    break;
++            }
++        }
++
++        if (m_pipes >= 8)
++        {
++            ADDR_ASSERT((index + 1) < static_cast<INT_32>(m_noOfEntries));
++            // Only do this when tile mode table is updated.
++            if (((tileMode == ADDR_TM_PRT_TILED_THIN1) || (tileMode == ADDR_TM_PRT_TILED_THICK)) &&
++                (m_tileTable[index+1].mode == tileMode))
++            {
++                UINT_32 bytesXSamples = bpp * numSamples / 8;
++                UINT_32 bytesXThickness = bpp * thickness / 8;
++                UINT_32 switchP4Threshold = (m_pipes == 16) ? 8 : 32;
++
++                if ((bytesXSamples > switchP4Threshold) || (bytesXThickness > switchP4Threshold))
++                {
++                    // Pick next 4 pipe entry
++                    index += 1;
++                }
++            }
++        }
++    }
++    else
++    {
++        // A pre-filled tile info is ready
++        index = pOut->tileIndex;
++        macroModeIndex = pOut->macroModeIndex;
++
++        // pass tile type back for post tile index compute
++        pOut->tileType = inTileType;
++    }
++
++    // We only need to set up tile info if there is a valid index but macroModeIndex is invalid
++    if (index != TileIndexInvalid && macroModeIndex == TileIndexInvalid)
++    {
++        macroModeIndex = HwlComputeMacroModeIndex(index, flags, bpp, numSamples, pTileInfo);
++
++        /// Copy to pOut->tileType/tileIndex/macroModeIndex
++        pOut->tileIndex = index;
++        pOut->tileType = m_tileTable[index].type; // Or inTileType, the samea
++        pOut->macroModeIndex = macroModeIndex;
++    }
++    else if (tileMode == ADDR_TM_LINEAR_GENERAL)
++    {
++        pOut->tileIndex = TileIndexLinearGeneral;
++
++        // Copy linear-aligned entry??
++        *pTileInfo = m_tileTable[8].info;
++    }
++    else if (tileMode == ADDR_TM_LINEAR_ALIGNED)
++    {
++        pOut->tileIndex = 8;
++        *pTileInfo = m_tileTable[8].info;
++    }
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::ReadGbTileMode
++*
++*   @brief
++*       Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
++*   @return
++*       NA.
++***************************************************************************************************
++*/
++VOID CIAddrLib::ReadGbTileMode(
++    UINT_32             regValue,   ///< [in] GB_TILE_MODE register
++    ADDR_TILECONFIG*    pCfg        ///< [out] output structure
++    ) const
++{
++    GB_TILE_MODE gbTileMode;
++    gbTileMode.val = regValue;
++
++    pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode_new);
++    pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1);
++
++    if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER)
++    {
++        pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
++    }
++    else
++    {
++        pCfg->info.tileSplitBytes = 1 << gbTileMode.f.sample_split;
++    }
++
++    UINT_32 regArrayMode = gbTileMode.f.array_mode;
++
++    pCfg->mode = static_cast<AddrTileMode>(regArrayMode);
++
++    switch (regArrayMode)
++    {
++        case 5:
++            pCfg->mode = ADDR_TM_PRT_TILED_THIN1;
++            break;
++        case 6:
++            pCfg->mode = ADDR_TM_PRT_2D_TILED_THIN1;
++            break;
++        case 8:
++            pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
++            break;
++        case 9:
++            pCfg->mode = ADDR_TM_PRT_TILED_THICK;
++            break;
++        case 0xa:
++            pCfg->mode = ADDR_TM_PRT_2D_TILED_THICK;
++            break;
++        case 0xb:
++            pCfg->mode = ADDR_TM_PRT_3D_TILED_THIN1;
++            break;
++        case 0xe:
++            pCfg->mode = ADDR_TM_3D_TILED_XTHICK;
++            break;
++        case 0xf:
++            pCfg->mode = ADDR_TM_PRT_3D_TILED_THICK;
++            break;
++        default:
++            break;
++    }
++
++    // Fail-safe code for these always convert tile info, as the non-macro modes
++    // return the entry of tile mode table directly without looking up macro mode table
++    if (!IsMacroTiled(pCfg->mode))
++    {
++        pCfg->info.banks = 2;
++        pCfg->info.bankWidth = 1;
++        pCfg->info.bankHeight = 1;
++        pCfg->info.macroAspectRatio = 1;
++        pCfg->info.tileSplitBytes = 64;
++    }
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::InitTileSettingTable
++*
++*   @brief
++*       Initialize the ADDR_TILE_CONFIG table.
++*   @return
++*       TRUE if tile table is correctly initialized
++***************************************************************************************************
++*/
++BOOL_32 CIAddrLib::InitTileSettingTable(
++    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
++    UINT_32         noOfEntries     ///< [in] Numbe of entries in the table above
++    )
++{
++    BOOL_32 initOk = TRUE;
++
++    ADDR_ASSERT(noOfEntries <= TileTableSize);
++
++    memset(m_tileTable, 0, sizeof(m_tileTable));
++
++    if (noOfEntries != 0)
++    {
++        m_noOfEntries = noOfEntries;
++    }
++    else
++    {
++        m_noOfEntries = TileTableSize;
++    }
++
++    if (pCfg) // From Client
++    {
++        for (UINT_32 i = 0; i < m_noOfEntries; i++)
++        {
++            ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
++        }
++    }
++    else
++    {
++        ADDR_ASSERT_ALWAYS();
++        initOk = FALSE;
++    }
++
++    if (initOk)
++    {
++        ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
++
++        if (m_settings.isBonaire == FALSE)
++        {
++            // Check if entry 18 is "thick+thin" combination
++            if ((m_tileTable[18].mode == ADDR_TM_1D_TILED_THICK) &&
++                (m_tileTable[18].type == ADDR_NON_DISPLAYABLE))
++            {
++                m_allowNonDispThickModes = TRUE;
++                ADDR_ASSERT(m_tileTable[24].mode == ADDR_TM_2D_TILED_THICK);
++            }
++        }
++        else
++        {
++            m_allowNonDispThickModes = TRUE;
++        }
++
++        // Assume the first entry is always programmed with full pipes
++        m_pipes = HwlGetPipes(&m_tileTable[0].info);
++    }
++
++    return initOk;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::ReadGbMacroTileCfg
++*
++*   @brief
++*       Convert GB_MACRO_TILE_CFG HW value to ADDR_TILE_CONFIG.
++*   @return
++*       NA.
++***************************************************************************************************
++*/
++VOID CIAddrLib::ReadGbMacroTileCfg(
++    UINT_32             regValue,   ///< [in] GB_MACRO_TILE_MODE register
++    ADDR_TILEINFO*      pCfg        ///< [out] output structure
++    ) const
++{
++    GB_MACROTILE_MODE gbTileMode;
++    gbTileMode.val = regValue;
++
++    pCfg->bankHeight = 1 << gbTileMode.f.bank_height;
++    pCfg->bankWidth = 1 << gbTileMode.f.bank_width;
++    pCfg->banks = 1 << (gbTileMode.f.num_banks + 1);
++    pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::InitMacroTileCfgTable
++*
++*   @brief
++*       Initialize the ADDR_MACRO_TILE_CONFIG table.
++*   @return
++*       TRUE if macro tile table is correctly initialized
++***************************************************************************************************
++*/
++BOOL_32 CIAddrLib::InitMacroTileCfgTable(
++    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
++    UINT_32         noOfMacroEntries     ///< [in] Numbe of entries in the table above
++    )
++{
++    BOOL_32 initOk = TRUE;
++
++    ADDR_ASSERT(noOfMacroEntries <= MacroTileTableSize);
++
++    memset(m_macroTileTable, 0, sizeof(m_macroTileTable));
++
++    if (noOfMacroEntries != 0)
++    {
++        m_noOfMacroEntries = noOfMacroEntries;
++    }
++    else
++    {
++        m_noOfMacroEntries = MacroTileTableSize;
++    }
++
++    if (pCfg) // From Client
++    {
++        for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
++        {
++            ReadGbMacroTileCfg(*(pCfg + i), &m_macroTileTable[i]);
++
++            m_macroTileTable[i].tileSplitBytes = 64 << (i % 8);
++        }
++    }
++    else
++    {
++        ADDR_ASSERT_ALWAYS();
++        initOk = FALSE;
++    }
++    return initOk;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlComputeMacroModeIndex
++*
++*   @brief
++*       Computes macro tile mode index
++*   @return
++*       TRUE if macro tile table is correctly initialized
++***************************************************************************************************
++*/
++INT_32 CIAddrLib::HwlComputeMacroModeIndex(
++    INT_32              tileIndex,      ///< [in] Tile mode index
++    ADDR_SURFACE_FLAGS  flags,          ///< [in] Surface flags
++    UINT_32             bpp,            ///< [in] Bit per pixel
++    UINT_32             numSamples,     ///< [in] Number of samples
++    ADDR_TILEINFO*      pTileInfo,      ///< [out] Pointer to ADDR_TILEINFO
++    AddrTileMode*       pTileMode,      ///< [out] Pointer to AddrTileMode
++    AddrTileType*       pTileType       ///< [out] Pointer to AddrTileType
++    ) const
++{
++    INT_32 macroModeIndex = TileIndexInvalid;
++
++    if (flags.tcCompatible && flags.stencil)
++    {
++        // Don't compute macroModeIndex for tc compatible stencil surface
++        macroModeIndex = TileIndexNoMacroIndex;
++    }
++    else
++    {
++        AddrTileMode tileMode = m_tileTable[tileIndex].mode;
++        AddrTileType tileType = m_tileTable[tileIndex].type;
++        UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++
++        if (!IsMacroTiled(tileMode))
++        {
++            *pTileInfo = m_tileTable[tileIndex].info;
++            macroModeIndex = TileIndexNoMacroIndex;
++        }
++        else
++        {
++            UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
++            UINT_32 tileSplit;
++
++            if (m_tileTable[tileIndex].type == ADDR_DEPTH_SAMPLE_ORDER)
++            {
++                // Depth entries store real tileSplitBytes
++                tileSplit = m_tileTable[tileIndex].info.tileSplitBytes;
++            }
++            else
++            {
++                // Non-depth entries store a split factor
++                UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes;
++                UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x);
++
++                tileSplit = colorTileSplit;
++            }
++
++            UINT_32 tileSplitC = Min(m_rowSize, tileSplit);
++            UINT_32 tileBytes;
++
++            if (flags.fmask)
++            {
++                tileBytes = Min(tileSplitC, tileBytes1x);
++            }
++            else
++            {
++                tileBytes = Min(tileSplitC, numSamples * tileBytes1x);
++            }
++
++            if (tileBytes < 64)
++            {
++                tileBytes = 64;
++            }
++
++            macroModeIndex = Log2(tileBytes / 64);
++
++            if (flags.prt || IsPrtTileMode(tileMode))
++            {
++                // Unknown - assume it is 1/2 of table size
++                const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2;
++
++                macroModeIndex += PrtMacroModeOffset;
++                *pTileInfo = m_macroTileTable[macroModeIndex];
++            }
++            else
++            {
++                *pTileInfo = m_macroTileTable[macroModeIndex];
++            }
++
++            pTileInfo->pipeConfig = m_tileTable[tileIndex].info.pipeConfig;
++
++            if (m_tileTable[tileIndex].type != ADDR_DEPTH_SAMPLE_ORDER)
++            {
++                pTileInfo->tileSplitBytes = tileSplitC;
++            }
++            else
++            {
++                pTileInfo->tileSplitBytes = m_tileTable[tileIndex].info.tileSplitBytes;
++            }
++        }
++
++        if (NULL != pTileMode)
++        {
++            *pTileMode = tileMode;
++        }
++
++        if (NULL != pTileType)
++        {
++            *pTileType = tileType;
++        }
++    }
++
++    return macroModeIndex;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlComputeTileDataWidthAndHeightLinear
++*
++*   @brief
++*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
++*
++*   @return
++*       N/A
++*
++*   @note
++*       MacroWidth and macroHeight are measured in pixels
++***************************************************************************************************
++*/
++VOID CIAddrLib::HwlComputeTileDataWidthAndHeightLinear(
++    UINT_32*        pMacroWidth,     ///< [out] macro tile width
++    UINT_32*        pMacroHeight,    ///< [out] macro tile height
++    UINT_32         bpp,             ///< [in] bits per pixel
++    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
++    ) const
++{
++    ADDR_ASSERT(pTileInfo != NULL);
++
++    UINT_32 numTiles;
++
++    switch (pTileInfo->pipeConfig)
++    {
++        case ADDR_PIPECFG_P16_32x32_8x16:
++        case ADDR_PIPECFG_P16_32x32_16x16:
++        case ADDR_PIPECFG_P8_32x64_32x32:
++        case ADDR_PIPECFG_P8_32x32_16x32:
++        case ADDR_PIPECFG_P8_32x32_16x16:
++        case ADDR_PIPECFG_P8_32x32_8x16:
++        case ADDR_PIPECFG_P4_32x32:
++            numTiles = 8;
++            break;
++        default:
++            numTiles = 4;
++            break;
++    }
++
++    *pMacroWidth    = numTiles * MicroTileWidth;
++    *pMacroHeight   = numTiles * MicroTileHeight;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlStereoCheckRightOffsetPadding
++*
++*   @brief
++*       check if the height needs extra padding for stereo right eye offset, to avoid swizzling
++*
++*   @return
++*       TRUE is the extra padding is needed
++*
++*   @note
++*       Kalindi (Kabini) is the only one that needs this padding as there is a uncertain
++*       possible HW issue where the right eye displays incorrectly with some type of swizzles, if
++*       the right eye offset is not 64KB aligned - EPR#366461
++*       Other Kaveri APUs also need the padding according to DXX team's report otherwise
++*       corruption observed. - EPR#374788
++***************************************************************************************************
++*/
++BOOL_32 CIAddrLib::HwlStereoCheckRightOffsetPadding() const
++{
++    BOOL_32 bNeedPadding = FALSE;
++
++    if (m_settings.isKaveri)
++    {
++        bNeedPadding = TRUE;
++    }
++
++    return bNeedPadding;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlComputeMetadataNibbleAddress
++*
++*   @brief
++*        calculate meta data address based on input information
++*
++*   &parameter
++*        uncompressedDataByteAddress - address of a pixel in color surface
++*        dataBaseByteAddress         - base address of color surface
++*        metadataBaseByteAddress     - base address of meta ram
++*        metadataBitSize             - meta key size, 8 for DCC, 4 for cmask
++*        elementBitSize              - element size of color surface
++*        blockByteSize               - compression block size, 256 for DCC
++*        pipeInterleaveBytes         - pipe interleave size
++*        numOfPipes                  - number of pipes
++*        numOfBanks                  - number of banks
++*        numOfSamplesPerSplit        - number of samples per tile split
++*   @return
++*        meta data nibble address (nibble address is used to support DCC compatible cmask)
++*
++***************************************************************************************************
++*/
++UINT_64 CIAddrLib::HwlComputeMetadataNibbleAddress(
++    UINT_64 uncompressedDataByteAddress,
++    UINT_64 dataBaseByteAddress,
++    UINT_64 metadataBaseByteAddress,
++    UINT_32 metadataBitSize,
++    UINT_32 elementBitSize,
++    UINT_32 blockByteSize,
++    UINT_32 pipeInterleaveBytes,
++    UINT_32 numOfPipes,
++    UINT_32 numOfBanks,
++    UINT_32 numOfSamplesPerSplit) const
++{
++    ///--------------------------------------------------------------------------------------------
++    /// Get pipe interleave, bank and pipe bits
++    ///--------------------------------------------------------------------------------------------
++    UINT_32 pipeInterleaveBits  = Log2(pipeInterleaveBytes);
++    UINT_32 pipeBits            = Log2(numOfPipes);
++    UINT_32 bankBits            = Log2(numOfBanks);
++
++    ///--------------------------------------------------------------------------------------------
++    /// Clear pipe and bank swizzles
++    ///--------------------------------------------------------------------------------------------
++    UINT_32 dataMacrotileBits        = pipeInterleaveBits + pipeBits + bankBits;
++    UINT_32 metadataMacrotileBits    = pipeInterleaveBits + pipeBits + bankBits;
++
++    UINT_64 dataMacrotileClearMask     = ~((1L << dataMacrotileBits) - 1);
++    UINT_64 metadataMacrotileClearMask = ~((1L << metadataMacrotileBits) - 1);
++
++    UINT_64 dataBaseByteAddressNoSwizzle = dataBaseByteAddress & dataMacrotileClearMask;
++    UINT_64 metadataBaseByteAddressNoSwizzle = metadataBaseByteAddress & metadataMacrotileClearMask;
++
++    ///--------------------------------------------------------------------------------------------
++    /// Modify metadata base before adding in so that when final address is divided by data ratio,
++    /// the base address returns to where it should be
++    ///--------------------------------------------------------------------------------------------
++    ADDR_ASSERT((0 != metadataBitSize));
++    UINT_64 metadataBaseShifted = metadataBaseByteAddressNoSwizzle * blockByteSize * 8 /
++                                  metadataBitSize;
++    UINT_64 offset = uncompressedDataByteAddress -
++                     dataBaseByteAddressNoSwizzle +
++                     metadataBaseShifted;
++
++    ///--------------------------------------------------------------------------------------------
++    /// Save bank data bits
++    ///--------------------------------------------------------------------------------------------
++    UINT_32 lsb = pipeBits + pipeInterleaveBits;
++    UINT_32 msb = bankBits - 1 + lsb;
++
++    UINT_64 bankDataBits = AddrGetBits(offset, msb, lsb);
++
++    ///--------------------------------------------------------------------------------------------
++    /// Save pipe data bits
++    ///--------------------------------------------------------------------------------------------
++    lsb = pipeInterleaveBits;
++    msb = pipeBits - 1 + lsb;
++
++    UINT_64 pipeDataBits = AddrGetBits(offset, msb, lsb);
++
++    ///--------------------------------------------------------------------------------------------
++    /// Remove pipe and bank bits
++    ///--------------------------------------------------------------------------------------------
++    lsb = pipeInterleaveBits;
++    msb = dataMacrotileBits - 1;
++
++    UINT_64 offsetWithoutPipeBankBits = AddrRemoveBits(offset, msb, lsb);
++
++    ADDR_ASSERT((0 != blockByteSize));
++    UINT_64 blockInBankpipe = offsetWithoutPipeBankBits / blockByteSize;
++
++    UINT_32 tileSize = 8 * 8 * elementBitSize/8 * numOfSamplesPerSplit;
++    UINT_32 blocksInTile = tileSize / blockByteSize;
++
++    if (0 == blocksInTile)
++    {
++        lsb = 0;
++    }
++    else
++    {
++        lsb = Log2(blocksInTile);
++    }
++    msb = bankBits - 1 + lsb;
++
++    UINT_64 blockInBankpipeWithBankBits = AddrInsertBits(blockInBankpipe, bankDataBits, msb, lsb);
++
++    /// NOTE *2 because we are converting to Nibble address in this step
++    UINT_64 metaAddressInPipe = blockInBankpipeWithBankBits * 2 * metadataBitSize / 8;
++
++
++    ///--------------------------------------------------------------------------------------------
++    /// Reinsert pipe bits back into the final address
++    ///--------------------------------------------------------------------------------------------
++    lsb = pipeInterleaveBits + 1; ///<+1 due to Nibble address now gives interleave bits extra lsb.
++    msb = pipeBits - 1 + lsb;
++    UINT_64 metadataAddress = AddrInsertBits(metaAddressInPipe, pipeDataBits, msb, lsb);
++
++    return metadataAddress;
++}
++
++/**
++***************************************************************************************************
++*   CIAddrLib::HwlPadDimensions
++*
++*   @brief
++*       Helper function to pad dimensions
++*
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID CIAddrLib::HwlPadDimensions(
++    AddrTileMode        tileMode,    ///< [in] tile mode
++    UINT_32             bpp,         ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
++    UINT_32             numSamples,  ///< [in] number of samples
++    ADDR_TILEINFO*      pTileInfo,   ///< [in/out] bank structure.
++    UINT_32             padDims,     ///< [in] Dimensions to pad valid value 1,2,3
++    UINT_32             mipLevel,    ///< [in] MipLevel
++    UINT_32*            pPitch,      ///< [in/out] pitch in pixels
++    UINT_32             pitchAlign,  ///< [in] pitch alignment
++    UINT_32*            pHeight,     ///< [in/out] height in pixels
++    UINT_32             heightAlign, ///< [in] height alignment
++    UINT_32*            pSlices,     ///< [in/out] number of slices
++    UINT_32             sliceAlign   ///< [in] number of slice alignment
++    ) const
++{
++    if (m_settings.isVolcanicIslands &&
++        flags.dccCompatible &&
++        (numSamples > 1) &&
++        (mipLevel == 0) &&
++        IsMacroTiled(tileMode))
++    {
++        UINT_32 tileSizePerSample = BITS_TO_BYTES(bpp * MicroTileWidth * MicroTileHeight);
++        UINT_32 samplesPerSplit  = pTileInfo->tileSplitBytes / tileSizePerSample;
++
++        if (samplesPerSplit < numSamples)
++        {
++            UINT_32 dccFastClearByteAlign = HwlGetPipes(pTileInfo) * m_pipeInterleaveBytes * 256;
++            UINT_32 bytesPerSplit = BITS_TO_BYTES((*pPitch) * (*pHeight) * bpp * samplesPerSplit);
++
++            ADDR_ASSERT(IsPow2(dccFastClearByteAlign));
++
++            if (0 != (bytesPerSplit & (dccFastClearByteAlign - 1)))
++            {
++                UINT_32 dccFastClearPixelAlign = dccFastClearByteAlign /
++                                                BITS_TO_BYTES(bpp) /
++                                                samplesPerSplit;
++                UINT_32 macroTilePixelAlign = pitchAlign * heightAlign;
++
++                if ((dccFastClearPixelAlign >= macroTilePixelAlign) &&
++                    ((dccFastClearPixelAlign % macroTilePixelAlign) == 0))
++                {
++                    UINT_32 dccFastClearPitchAlignInMacroTile =
++                        dccFastClearPixelAlign / macroTilePixelAlign;
++                    UINT_32 heightInMacroTile = *pHeight / heightAlign;
++                    UINT_32 dccFastClearPitchAlignInPixels;
++
++                    while ((heightInMacroTile > 1) &&
++                           ((heightInMacroTile % 2) == 0) &&
++                           (dccFastClearPitchAlignInMacroTile > 1) &&
++                           ((dccFastClearPitchAlignInMacroTile % 2) == 0))
++                    {
++                        heightInMacroTile >>= 1;
++                        dccFastClearPitchAlignInMacroTile >>= 1;
++                    }
++
++                    dccFastClearPitchAlignInPixels = pitchAlign * dccFastClearPitchAlignInMacroTile;
++
++                    if (IsPow2(dccFastClearPitchAlignInPixels))
++                    {
++                        *pPitch = PowTwoAlign((*pPitch), dccFastClearPitchAlignInPixels);
++                    }
++                    else
++                    {
++                        *pPitch += (dccFastClearPitchAlignInPixels - 1);
++                        *pPitch /= dccFastClearPitchAlignInPixels;
++                        *pPitch *= dccFastClearPitchAlignInPixels;
++                    }
++                }
++            }
++        }
++    }
++}
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.h b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.h
+new file mode 100644
+index 0000000..0220736
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/ciaddrlib.h
+@@ -0,0 +1,197 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  ciaddrlib.h
++* @brief Contains the CIAddrLib class definition.
++***************************************************************************************************
++*/
++
++#ifndef __CI_ADDR_LIB_H__
++#define __CI_ADDR_LIB_H__
++
++#include "addrlib.h"
++#include "siaddrlib.h"
++
++/**
++***************************************************************************************************
++* @brief CI specific settings structure.
++***************************************************************************************************
++*/
++struct CIChipSettings
++{
++    struct
++    {
++        UINT_32 isSeaIsland : 1;
++        UINT_32 isBonaire   : 1;
++        UINT_32 isKaveri    : 1;
++        UINT_32 isSpectre   : 1;
++        UINT_32 isSpooky    : 1;
++        UINT_32 isKalindi   : 1;
++        // Hawaii is GFXIP 7.2, similar with CI (Bonaire)
++        UINT_32 isHawaii    : 1;
++
++        // VI
++        UINT_32 isVolcanicIslands : 1;
++        UINT_32 isIceland         : 1;
++        UINT_32 isTonga           : 1;
++        // VI fusion (Carrizo)
++        UINT_32 isCarrizo         : 1;
++    };
++};
++
++/**
++***************************************************************************************************
++* @brief This class is the CI specific address library
++*        function set.
++***************************************************************************************************
++*/
++class CIAddrLib : public SIAddrLib
++{
++public:
++    /// Creates CIAddrLib object
++    static AddrLib* CreateObj(const AddrClient* pClient)
++    {
++        return new(pClient) CIAddrLib(pClient);
++    }
++
++private:
++    CIAddrLib(const AddrClient* pClient);
++    virtual ~CIAddrLib();
++
++protected:
++
++    // Hwl interface - defined in AddrLib
++    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
++
++    virtual AddrChipFamily HwlConvertChipFamily(
++        UINT_32 uChipFamily, UINT_32 uChipRevision);
++
++    virtual BOOL_32 HwlInitGlobalParams(
++        const ADDR_CREATE_INPUT* pCreateIn);
++
++    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
++        INT_32 index, INT_32 macroModeIndex, ADDR_TILEINFO* pInfo,
++        AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
++
++    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
++        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
++        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
++
++    virtual INT_32 HwlComputeMacroModeIndex(
++        INT_32 tileIndex, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
++        ADDR_TILEINFO* pTileInfo, AddrTileMode* pTileMode = NULL, AddrTileType* pTileType = NULL
++        ) const;
++
++    // Sub-hwl interface - defined in EgBasedAddrLib
++    virtual VOID HwlSetupTileInfo(
++        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
++        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
++        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    virtual INT_32 HwlPostCheckTileIndex(
++        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
++        INT curIndex = TileIndexInvalid) const;
++
++    virtual VOID   HwlFmaskPreThunkSurfInfo(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
++        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
++        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
++
++    virtual VOID   HwlFmaskPostThunkSurfInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
++        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
++
++    virtual AddrTileMode HwlDegradeThickTileMode(
++        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
++
++    virtual BOOL_32 HwlOverrideTileMode(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        AddrTileMode* pTileMode,
++        AddrTileType* pTileType) const;
++
++    virtual BOOL_32 HwlStereoCheckRightOffsetPadding() const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
++        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
++        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
++        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++protected:
++    virtual VOID HwlPadDimensions(
++        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
++        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
++        UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
++        UINT_32* pSlices, UINT_32 sliceAlign) const;
++
++private:
++    VOID ReadGbTileMode(
++        UINT_32 regValue, ADDR_TILECONFIG* pCfg) const;
++
++    VOID ReadGbMacroTileCfg(
++        UINT_32 regValue, ADDR_TILEINFO* pCfg) const;
++
++    BOOL_32 InitTileSettingTable(
++        const UINT_32 *pSetting, UINT_32 noOfEntries);
++
++    BOOL_32 InitMacroTileCfgTable(
++        const UINT_32 *pSetting, UINT_32 noOfEntries);
++
++    UINT_64 HwlComputeMetadataNibbleAddress(
++        UINT_64 uncompressedDataByteAddress,
++        UINT_64 dataBaseByteAddress,
++        UINT_64 metadataBaseByteAddress,
++        UINT_32 metadataBitSize,
++        UINT_32 elementBitSize,
++        UINT_32 blockByteSize,
++        UINT_32 pipeInterleaveBytes,
++        UINT_32 numOfPipes,
++        UINT_32 numOfBanks,
++        UINT_32 numOfSamplesPerSplit) const;
++
++    static const UINT_32    MacroTileTableSize = 16;
++    ADDR_TILEINFO           m_macroTileTable[MacroTileTableSize];
++    UINT_32                 m_noOfMacroEntries;
++    BOOL_32                 m_allowNonDispThickModes;
++
++    CIChipSettings          m_settings;
++};
++
++#endif
++
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.cpp b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.cpp
+new file mode 100644
+index 0000000..2e16cb3
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.cpp
+@@ -0,0 +1,4578 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  egbaddrlib.cpp
++* @brief Contains the EgBasedAddrLib class implementation
++***************************************************************************************************
++*/
++
++#include "egbaddrlib.h"
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::EgBasedAddrLib
++*
++*   @brief
++*       Constructor
++*
++*   @note
++*
++***************************************************************************************************
++*/
++EgBasedAddrLib::EgBasedAddrLib(const AddrClient* pClient) :
++    AddrLib(pClient),
++    m_ranks(0),
++    m_logicalBanks(0),
++    m_bankInterleave(1)
++{
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::~EgBasedAddrLib
++*
++*   @brief
++*       Destructor
++***************************************************************************************************
++*/
++EgBasedAddrLib::~EgBasedAddrLib()
++{
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::DispatchComputeSurfaceInfo
++*
++*   @brief
++*       Compute surface sizes include padded pitch,height,slices,total size in bytes,
++*       meanwhile output suitable tile mode and base alignment might be changed in this
++*       call as well. Results are returned through output parameters.
++*
++*   @return
++*       TRUE if no error occurs
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::DispatchComputeSurfaceInfo(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
++    ) const
++{
++    AddrTileMode        tileMode      = pIn->tileMode;
++    UINT_32             bpp           = pIn->bpp;
++    UINT_32             numSamples    = pIn->numSamples;
++    UINT_32             numFrags      = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
++    UINT_32             pitch         = pIn->width;
++    UINT_32             height        = pIn->height;
++    UINT_32             numSlices     = pIn->numSlices;
++    UINT_32             mipLevel      = pIn->mipLevel;
++    ADDR_SURFACE_FLAGS  flags         = pIn->flags;
++
++    ADDR_TILEINFO       tileInfoDef   = {0};
++    ADDR_TILEINFO*      pTileInfo     = &tileInfoDef;
++
++    UINT_32             padDims = 0;
++    BOOL_32             valid;
++
++    tileMode = DegradeLargeThickTile(tileMode, bpp);
++
++    // Only override numSamples for NI above
++    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
++    {
++        if (numFrags != numSamples) // This means EQAA
++        {
++            // The real surface size needed is determined by number of fragments
++            numSamples = numFrags;
++        }
++
++        // Save altered numSamples in pOut
++        pOut->numSamples = numSamples;
++    }
++
++    // Caller makes sure pOut->pTileInfo is not NULL, see HwlComputeSurfaceInfo
++    ADDR_ASSERT(pOut->pTileInfo);
++
++    if (pOut->pTileInfo != NULL)
++    {
++        pTileInfo = pOut->pTileInfo;
++    }
++
++    // Set default values
++    if (pIn->pTileInfo != NULL)
++    {
++        if (pTileInfo != pIn->pTileInfo)
++        {
++            *pTileInfo = *pIn->pTileInfo;
++        }
++    }
++    else
++    {
++        memset(pTileInfo, 0, sizeof(ADDR_TILEINFO));
++    }
++
++    // For macro tile mode, we should calculate default tiling parameters
++    HwlSetupTileInfo(tileMode,
++                     flags,
++                     bpp,
++                     pitch,
++                     height,
++                     numSamples,
++                     pIn->pTileInfo,
++                     pTileInfo,
++                     pIn->tileType,
++                     pOut);
++
++    if (flags.cube)
++    {
++        if (mipLevel == 0)
++        {
++            padDims = 2;
++        }
++
++        if (numSlices == 1)
++        {
++            // This is calculating one face, remove cube flag
++            flags.cube = 0;
++        }
++    }
++
++    switch (tileMode)
++    {
++        case ADDR_TM_LINEAR_GENERAL://fall through
++        case ADDR_TM_LINEAR_ALIGNED:
++            valid = ComputeSurfaceInfoLinear(pIn, pOut, padDims);
++            break;
++
++        case ADDR_TM_1D_TILED_THIN1://fall through
++        case ADDR_TM_1D_TILED_THICK:
++            valid = ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, tileMode);
++            break;
++
++        case ADDR_TM_2D_TILED_THIN1:    //fall through
++        case ADDR_TM_2D_TILED_THICK:    //fall through
++        case ADDR_TM_3D_TILED_THIN1:    //fall through
++        case ADDR_TM_3D_TILED_THICK:    //fall through
++        case ADDR_TM_2D_TILED_XTHICK:   //fall through
++        case ADDR_TM_3D_TILED_XTHICK:   //fall through
++        case ADDR_TM_PRT_TILED_THIN1:   //fall through
++        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
++        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
++        case ADDR_TM_PRT_TILED_THICK:   //fall through
++        case ADDR_TM_PRT_2D_TILED_THICK://fall through
++        case ADDR_TM_PRT_3D_TILED_THICK:
++            valid = ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, tileMode);
++            break;
++
++        default:
++            valid = FALSE;
++            ADDR_ASSERT_ALWAYS();
++            break;
++    }
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceInfoLinear
++*
++*   @brief
++*       Compute linear surface sizes include padded pitch, height, slices, total size in
++*       bytes, meanwhile alignments as well. Since it is linear mode, so output tile mode
++*       will not be changed here. Results are returned through output parameters.
++*
++*   @return
++*       TRUE if no error occurs
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoLinear(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] Input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,   ///< [out] Output structure
++    UINT_32                                 padDims ///< [in] Dimensions to padd
++    ) const
++{
++    UINT_32 expPitch = pIn->width;
++    UINT_32 expHeight = pIn->height;
++    UINT_32 expNumSlices = pIn->numSlices;
++
++    // No linear MSAA on real H/W, keep this for TGL
++    UINT_32 numSamples = pOut->numSamples;
++
++    const UINT_32 microTileThickness = 1;
++
++    //
++    // Compute the surface alignments.
++    //
++    ComputeSurfaceAlignmentsLinear(pIn->tileMode,
++                                   pIn->bpp,
++                                   pIn->flags,
++                                   &pOut->baseAlign,
++                                   &pOut->pitchAlign,
++                                   &pOut->heightAlign);
++
++    if ((pIn->tileMode == ADDR_TM_LINEAR_GENERAL) && pIn->flags.color && (pIn->height > 1))
++    {
++#if !ALT_TEST
++        // When linear_general surface is accessed in multiple lines, it requires 8 pixels in pitch
++        // alignment since PITCH_TILE_MAX is in unit of 8 pixels.
++        // It is OK if it is accessed per line.
++        ADDR_ASSERT((pIn->width % 8) == 0);
++#endif
++    }
++
++    pOut->depthAlign = microTileThickness;
++
++    expPitch = HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
++
++    //
++    // Pad pitch and height to the required granularities.
++    //
++    PadDimensions(pIn->tileMode,
++                  pIn->bpp,
++                  pIn->flags,
++                  numSamples,
++                  pOut->pTileInfo,
++                  padDims,
++                  pIn->mipLevel,
++                  &expPitch, pOut->pitchAlign,
++                  &expHeight, pOut->heightAlign,
++                  &expNumSlices, microTileThickness);
++
++    expPitch = HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
++
++    //
++    // Adjust per HWL
++    //
++
++    UINT_64 logicalSliceSize;
++
++    logicalSliceSize = HwlGetSizeAdjustmentLinear(pIn->tileMode,
++                                                  pIn->bpp,
++                                                  numSamples,
++                                                  pOut->baseAlign,
++                                                  pOut->pitchAlign,
++                                                  &expPitch,
++                                                  &expHeight,
++                                                  &pOut->heightAlign);
++
++
++    pOut->pitch = expPitch;
++    pOut->height = expHeight;
++    pOut->depth = expNumSlices;
++
++    pOut->surfSize = logicalSliceSize * expNumSlices;
++
++    pOut->tileMode = pIn->tileMode;
++
++    return TRUE;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceInfoMicroTiled
++*
++*   @brief
++*       Compute 1D/Micro Tiled surface sizes include padded pitch, height, slices, total
++*       size in bytes, meanwhile alignments as well. Results are returned through output
++*       parameters.
++*
++*   @return
++*       TRUE if no error occurs
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMicroTiled(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,       ///< [out] Output structure
++    UINT_32                                 padDims,    ///< [in] Dimensions to padd
++    AddrTileMode                            expTileMode ///< [in] Expected tile mode
++    ) const
++{
++    BOOL_32 valid = TRUE;
++
++    UINT_32 microTileThickness;
++    UINT_32 expPitch = pIn->width;
++    UINT_32 expHeight = pIn->height;
++    UINT_32 expNumSlices = pIn->numSlices;
++
++    // No 1D MSAA on real H/W, keep this for TGL
++    UINT_32 numSamples = pOut->numSamples;
++
++    //
++    // Compute the micro tile thickness.
++    //
++    microTileThickness = ComputeSurfaceThickness(expTileMode);
++
++    //
++    // Extra override for mip levels
++    //
++    if (pIn->mipLevel > 0)
++    {
++        //
++        // Reduce tiling mode from thick to thin if the number of slices is less than the
++        // micro tile thickness.
++        //
++        if ((expTileMode == ADDR_TM_1D_TILED_THICK) &&
++            (expNumSlices < ThickTileThickness))
++        {
++            expTileMode = HwlDegradeThickTileMode(ADDR_TM_1D_TILED_THICK, expNumSlices, NULL);
++            if (expTileMode != ADDR_TM_1D_TILED_THICK)
++            {
++                microTileThickness = 1;
++            }
++        }
++    }
++
++    //
++    // Compute the surface restrictions.
++    //
++    ComputeSurfaceAlignmentsMicroTiled(expTileMode,
++                                       pIn->bpp,
++                                       pIn->flags,
++                                       numSamples,
++                                       &pOut->baseAlign,
++                                       &pOut->pitchAlign,
++                                       &pOut->heightAlign);
++
++    pOut->depthAlign = microTileThickness;
++
++    //
++    // Pad pitch and height to the required granularities.
++    // Compute surface size.
++    // Return parameters.
++    //
++    PadDimensions(expTileMode,
++                  pIn->bpp,
++                  pIn->flags,
++                  numSamples,
++                  pOut->pTileInfo,
++                  padDims,
++                  pIn->mipLevel,
++                  &expPitch, pOut->pitchAlign,
++                  &expHeight, pOut->heightAlign,
++                  &expNumSlices, microTileThickness);
++
++    //
++    // Get HWL specific pitch adjustment
++    //
++    UINT_64 logicalSliceSize = HwlGetSizeAdjustmentMicroTiled(microTileThickness,
++                                                              pIn->bpp,
++                                                              pIn->flags,
++                                                              numSamples,
++                                                              pOut->baseAlign,
++                                                              pOut->pitchAlign,
++                                                              &expPitch,
++                                                              &expHeight);
++
++
++    pOut->pitch = expPitch;
++    pOut->height = expHeight;
++    pOut->depth = expNumSlices;
++
++    pOut->surfSize = logicalSliceSize * expNumSlices;
++
++    pOut->tileMode = expTileMode;
++
++    return valid;
++}
++
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceInfoMacroTiled
++*
++*   @brief
++*       Compute 2D/macro tiled surface sizes include padded pitch, height, slices, total
++*       size in bytes, meanwhile output suitable tile mode and alignments might be changed
++*       in this call as well. Results are returned through output parameters.
++*
++*   @return
++*       TRUE if no error occurs
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,       ///< [out] Output structure
++    UINT_32                                 padDims,    ///< [in] Dimensions to padd
++    AddrTileMode                            expTileMode ///< [in] Expected tile mode
++    ) const
++{
++    BOOL_32 valid = TRUE;
++
++    AddrTileMode origTileMode = expTileMode;
++    UINT_32 microTileThickness;
++
++    UINT_32 paddedPitch;
++    UINT_32 paddedHeight;
++    UINT_64 bytesPerSlice;
++
++    UINT_32 expPitch     = pIn->width;
++    UINT_32 expHeight    = pIn->height;
++    UINT_32 expNumSlices = pIn->numSlices;
++
++    UINT_32 numSamples = pOut->numSamples;
++
++    //
++    // Compute the surface restrictions as base
++    // SanityCheckMacroTiled is called in ComputeSurfaceAlignmentsMacroTiled
++    //
++    valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
++                                               pIn->bpp,
++                                               pIn->flags,
++                                               pIn->mipLevel,
++                                               numSamples,
++                                               pOut->pTileInfo,
++                                               &pOut->baseAlign,
++                                               &pOut->pitchAlign,
++                                               &pOut->heightAlign);
++
++    if (valid)
++    {
++        //
++        // Compute the micro tile thickness.
++        //
++        microTileThickness = ComputeSurfaceThickness(expTileMode);
++
++        //
++        // Find the correct tiling mode for mip levels
++        //
++        if (pIn->mipLevel > 0)
++        {
++            //
++            // Try valid tile mode
++            //
++            expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode,
++                                                         pIn->bpp,
++                                                         expPitch,
++                                                         expHeight,
++                                                         expNumSlices,
++                                                         numSamples,
++                                                         pOut->pitchAlign,
++                                                         pOut->heightAlign,
++                                                         pOut->pTileInfo);
++
++            if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled
++            {
++                return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode);
++            }
++            else
++            {
++                if (microTileThickness != ComputeSurfaceThickness(expTileMode))
++                {
++                    //
++                    // Re-compute if thickness changed since bank-height may be changed!
++                    //
++                    return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode);
++                }
++            }
++        }
++
++        paddedPitch     = expPitch;
++        paddedHeight    = expHeight;
++
++        //
++        // Re-cal alignment
++        //
++        if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled
++        {
++            valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
++                                                       pIn->bpp,
++                                                       pIn->flags,
++                                                       pIn->mipLevel,
++                                                       numSamples,
++                                                       pOut->pTileInfo,
++                                                       &pOut->baseAlign,
++                                                       &pOut->pitchAlign,
++                                                       &pOut->heightAlign);
++        }
++
++        //
++        // Do padding
++        //
++        PadDimensions(expTileMode,
++                      pIn->bpp,
++                      pIn->flags,
++                      numSamples,
++                      pOut->pTileInfo,
++                      padDims,
++                      pIn->mipLevel,
++                      &paddedPitch, pOut->pitchAlign,
++                      &paddedHeight, pOut->heightAlign,
++                      &expNumSlices, microTileThickness);
++
++        if (pIn->flags.qbStereo &&
++            (pOut->pStereoInfo != NULL) &&
++            HwlStereoCheckRightOffsetPadding())
++        {
++            // Eye height's bank bits are different from y == 0?
++            // Since 3D rendering treats right eye buffer starting from y == "eye height" while
++            // display engine treats it to be 0, so the bank bits may be different, we pad
++            // more in height to make sure y == "eye height" has the same bank bits as y == 0.
++            UINT_32 checkMask = pOut->pTileInfo->banks - 1;
++            UINT_32 bankBits = 0;
++            do
++            {
++                bankBits = (paddedHeight / 8 / pOut->pTileInfo->bankHeight) & checkMask;
++
++                if (bankBits)
++                {
++                   paddedHeight += pOut->heightAlign;
++                }
++            } while (bankBits);
++        }
++
++        //
++        // Compute the size of a slice.
++        //
++        bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(paddedPitch) *
++                                      paddedHeight * NextPow2(pIn->bpp) * numSamples);
++
++        pOut->pitch = paddedPitch;
++        // Put this check right here to workaround special mipmap cases which the original height
++        // is needed.
++        // The original height is pre-stored in pOut->height in PostComputeMipLevel and
++        // pOut->pitch is needed in HwlCheckLastMacroTiledLvl, too.
++        if (m_configFlags.checkLast2DLevel && numSamples == 1) // Don't check MSAA
++        {
++            // Set a TRUE in pOut if next Level is the first 1D sub level
++            HwlCheckLastMacroTiledLvl(pIn, pOut);
++        }
++        pOut->height = paddedHeight;
++
++        pOut->depth = expNumSlices;
++
++        pOut->surfSize = bytesPerSlice * expNumSlices;
++
++        pOut->tileMode = expTileMode;
++
++        pOut->depthAlign = microTileThickness;
++
++    } // if (valid)
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceAlignmentsLinear
++*
++*   @brief
++*       Compute linear surface alignment, calculation results are returned through
++*       output parameters.
++*
++*   @return
++*       TRUE if no error occurs
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsLinear(
++    AddrTileMode        tileMode,          ///< [in] tile mode
++    UINT_32             bpp,               ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
++    UINT_32*            pBaseAlign,        ///< [out] base address alignment in bytes
++    UINT_32*            pPitchAlign,       ///< [out] pitch alignment in pixels
++    UINT_32*            pHeightAlign       ///< [out] height alignment in pixels
++    ) const
++{
++    BOOL_32 valid = TRUE;
++
++    switch (tileMode)
++    {
++        case ADDR_TM_LINEAR_GENERAL:
++            //
++            // The required base alignment and pitch and height granularities is to 1 element.
++            //
++            *pBaseAlign   = (bpp > 8) ? bpp / 8 : 1;
++            *pPitchAlign  = 1;
++            *pHeightAlign = 1;
++            break;
++        case ADDR_TM_LINEAR_ALIGNED:
++            //
++            // The required alignment for base is the pipe interleave size.
++            // The required granularity for pitch is hwl dependent.
++            // The required granularity for height is one row.
++            //
++            *pBaseAlign     = m_pipeInterleaveBytes;
++            *pPitchAlign    = HwlGetPitchAlignmentLinear(bpp, flags);
++            *pHeightAlign   = 1;
++            break;
++        default:
++            *pBaseAlign     = 1;
++            *pPitchAlign    = 1;
++            *pHeightAlign   = 1;
++            ADDR_UNHANDLED_CASE();
++            break;
++    }
++
++    AdjustPitchAlignment(flags, pPitchAlign);
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled
++*
++*   @brief
++*       Compute 1D tiled surface alignment, calculation results are returned through
++*       output parameters.
++*
++*   @return
++*       TRUE if no error occurs
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled(
++    AddrTileMode        tileMode,          ///< [in] tile mode
++    UINT_32             bpp,               ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
++    UINT_32             numSamples,        ///< [in] number of samples
++    UINT_32*            pBaseAlign,        ///< [out] base address alignment in bytes
++    UINT_32*            pPitchAlign,       ///< [out] pitch alignment in pixels
++    UINT_32*            pHeightAlign       ///< [out] height alignment in pixels
++    ) const
++{
++    BOOL_32 valid = TRUE;
++
++    //
++    // The required alignment for base is the pipe interleave size.
++    //
++    *pBaseAlign   = m_pipeInterleaveBytes;
++
++    *pPitchAlign  = HwlGetPitchAlignmentMicroTiled(tileMode, bpp, flags, numSamples);
++
++    *pHeightAlign = MicroTileHeight;
++
++    AdjustPitchAlignment(flags, pPitchAlign);
++
++    // ECR#393489
++    // Workaround 2 for 1D tiling -  There is HW bug for Carrizo
++    // where it requires the following alignments for 1D tiling.
++    if (flags.czDispCompatible)
++    {
++        *pBaseAlign  = PowTwoAlign(*pBaseAlign, 4096);                         //Base address MOD 4096 = 0
++        *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 >> (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
++    }
++    // end Carrizo workaround for 1D tilling
++
++    return valid;
++}
++
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlReduceBankWidthHeight
++*
++*   @brief
++*       Additional checks, reduce bankHeight/bankWidth if needed and possible
++*       tileSize*BANK_WIDTH*BANK_HEIGHT <= ROW_SIZE
++*
++*   @return
++*       TRUE if no error occurs
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::HwlReduceBankWidthHeight(
++    UINT_32             tileSize,           ///< [in] tile size
++    UINT_32             bpp,                ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,              ///< [in] surface flags
++    UINT_32             numSamples,         ///< [in] number of samples
++    UINT_32             bankHeightAlign,    ///< [in] bank height alignment
++    UINT_32             pipes,              ///< [in] pipes
++    ADDR_TILEINFO*      pTileInfo           ///< [in/out] bank structure.
++    ) const
++{
++    UINT_32 macroAspectAlign;
++    BOOL_32 valid = TRUE;
++
++    if (tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize)
++    {
++        BOOL_32 stillGreater = TRUE;
++
++        // Try reducing bankWidth first
++        if (stillGreater && pTileInfo->bankWidth > 1)
++        {
++            while (stillGreater && pTileInfo->bankWidth > 0)
++            {
++                pTileInfo->bankWidth >>= 1;
++
++                if (pTileInfo->bankWidth == 0)
++                {
++                    pTileInfo->bankWidth = 1;
++                    break;
++                }
++
++                stillGreater =
++                    tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
++            }
++
++            // bankWidth is reduced above, so we need to recalculate bankHeight and ratio
++            bankHeightAlign = Max(1u,
++                                  m_pipeInterleaveBytes * m_bankInterleave /
++                                  (tileSize * pTileInfo->bankWidth)
++                                  );
++
++            // We cannot increase bankHeight so just assert this case.
++            ADDR_ASSERT((pTileInfo->bankHeight % bankHeightAlign) == 0);
++
++            if (numSamples == 1)
++            {
++                macroAspectAlign = Max(1u,
++                                   m_pipeInterleaveBytes * m_bankInterleave /
++                                   (tileSize * pipes * pTileInfo->bankWidth)
++                                   );
++                pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio,
++                                                          macroAspectAlign);
++            }
++        }
++
++        // Early quit bank_height degradation for "64" bit z buffer
++        if (flags.depth && bpp >= 64)
++        {
++            stillGreater = FALSE;
++        }
++
++        // Then try reducing bankHeight
++        if (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
++        {
++            while (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
++            {
++                pTileInfo->bankHeight >>= 1;
++
++                if (pTileInfo->bankHeight < bankHeightAlign)
++                {
++                    pTileInfo->bankHeight = bankHeightAlign;
++                    break;
++                }
++
++                stillGreater =
++                    tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
++            }
++        }
++
++        valid = !stillGreater;
++
++        // Generate a warning if we still fail to meet this constraint
++        if (!valid)
++        {
++            ADDR_WARN(
++                0, ("TILE_SIZE(%d)*BANK_WIDTH(%d)*BANK_HEIGHT(%d) <= ROW_SIZE(%d)",
++                tileSize, pTileInfo->bankWidth, pTileInfo->bankHeight, m_rowSize));
++        }
++    }
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled
++*
++*   @brief
++*       Compute 2D tiled surface alignment, calculation results are returned through
++*       output parameters.
++*
++*   @return
++*       TRUE if no error occurs
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled(
++    AddrTileMode        tileMode,           ///< [in] tile mode
++    UINT_32             bpp,                ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,              ///< [in] surface flags
++    UINT_32             mipLevel,           ///< [in] mip level
++    UINT_32             numSamples,         ///< [in] number of samples
++    ADDR_TILEINFO*      pTileInfo,          ///< [in/out] bank structure.
++    UINT_32*            pBaseAlign,         ///< [out] base address alignment in bytes
++    UINT_32*            pPitchAlign,        ///< [out] pitch alignment in pixels
++    UINT_32*            pHeightAlign        ///< [out] height alignment in pixels
++    ) const
++{
++    BOOL_32 valid = SanityCheckMacroTiled(pTileInfo);
++
++    if (valid)
++    {
++        UINT_32 macroTileWidth;
++        UINT_32 macroTileHeight;
++
++        UINT_32 tileSize;
++        UINT_32 bankHeightAlign;
++        UINT_32 macroAspectAlign;
++
++        UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++        UINT_32 pipes = HwlGetPipes(pTileInfo);
++
++        //
++        // Align bank height first according to latest h/w spec
++        //
++
++        // tile_size = MIN(tile_split, 64 * tile_thickness * element_bytes * num_samples)
++        tileSize = Min(pTileInfo->tileSplitBytes,
++                       BITS_TO_BYTES(64 * thickness * bpp * numSamples));
++
++        // bank_height_align =
++        // MAX(1, (pipe_interleave_bytes * bank_interleave)/(tile_size*bank_width))
++        bankHeightAlign = Max(1u,
++                              m_pipeInterleaveBytes * m_bankInterleave /
++                              (tileSize * pTileInfo->bankWidth)
++                              );
++
++        pTileInfo->bankHeight = PowTwoAlign(pTileInfo->bankHeight, bankHeightAlign);
++
++        // num_pipes * bank_width * macro_tile_aspect >=
++        // (pipe_interleave_size * bank_interleave) / tile_size
++        if (numSamples == 1)
++        {
++            // this restriction is only for mipmap (mipmap's numSamples must be 1)
++            macroAspectAlign = Max(1u,
++                               m_pipeInterleaveBytes * m_bankInterleave /
++                               (tileSize * pipes * pTileInfo->bankWidth)
++                               );
++            pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, macroAspectAlign);
++        }
++
++        valid = HwlReduceBankWidthHeight(tileSize,
++                                      bpp,
++                                      flags,
++                                      numSamples,
++                                      bankHeightAlign,
++                                      pipes,
++                                      pTileInfo);
++
++        //
++        // The required granularity for pitch is the macro tile width.
++        //
++        macroTileWidth = MicroTileWidth * pTileInfo->bankWidth * pipes *
++            pTileInfo->macroAspectRatio;
++
++        *pPitchAlign = macroTileWidth;
++
++        AdjustPitchAlignment(flags, pPitchAlign);
++
++        //
++        // The required granularity for height is the macro tile height.
++        //
++        macroTileHeight = MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
++            pTileInfo->macroAspectRatio;
++
++        *pHeightAlign = macroTileHeight;
++
++        //
++        // Compute base alignment
++        //
++        *pBaseAlign = pipes *
++            pTileInfo->bankWidth * pTileInfo->banks * pTileInfo->bankHeight * tileSize;
++
++        if ((mipLevel == 0) && (flags.prt) && (m_chipFamily == ADDR_CHIP_FAMILY_SI))
++        {
++            static const UINT_32 PrtTileSize = 0x10000;
++
++            UINT_32 macroTileSize = macroTileWidth * macroTileHeight * numSamples * bpp / 8;
++
++            if (macroTileSize < PrtTileSize)
++            {
++                UINT_32 numMacroTiles = PrtTileSize / macroTileSize;
++
++                ADDR_ASSERT((PrtTileSize % macroTileSize) == 0);
++
++                *pPitchAlign *= numMacroTiles;
++                *pBaseAlign  *= numMacroTiles;
++            }
++        }
++    }
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::SanityCheckMacroTiled
++*
++*   @brief
++*       Check if macro-tiled parameters are valid
++*   @return
++*       TRUE if valid
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::SanityCheckMacroTiled(
++    ADDR_TILEINFO* pTileInfo   ///< [in] macro-tiled parameters
++    ) const
++{
++    BOOL_32 valid       = TRUE;
++    UINT_32 numPipes    = HwlGetPipes(pTileInfo);
++
++    switch (pTileInfo->banks)
++    {
++        case 2: //fall through
++        case 4: //fall through
++        case 8: //fall through
++        case 16:
++            break;
++        default:
++            valid = FALSE;
++            break;
++
++    }
++
++    if (valid)
++    {
++        switch (pTileInfo->bankWidth)
++        {
++            case 1: //fall through
++            case 2: //fall through
++            case 4: //fall through
++            case 8:
++                break;
++            default:
++                valid = FALSE;
++                break;
++        }
++    }
++
++    if (valid)
++    {
++        switch (pTileInfo->bankHeight)
++        {
++            case 1: //fall through
++            case 2: //fall through
++            case 4: //fall through
++            case 8:
++                break;
++            default:
++                valid = FALSE;
++                break;
++        }
++    }
++
++    if (valid)
++    {
++        switch (pTileInfo->macroAspectRatio)
++        {
++            case 1: //fall through
++            case 2: //fall through
++            case 4: //fall through
++            case 8:
++                break;
++            default:
++                valid = FALSE;
++                break;
++        }
++    }
++
++    if (valid)
++    {
++        if (pTileInfo->banks < pTileInfo->macroAspectRatio)
++        {
++            // This will generate macro tile height <= 1
++            valid = FALSE;
++        }
++    }
++
++    if (valid)
++    {
++        if (pTileInfo->tileSplitBytes > m_rowSize)
++        {
++            valid = FALSE;
++        }
++    }
++
++    if (valid)
++    {
++        valid = HwlSanityCheckMacroTiled(pTileInfo);
++    }
++
++    ADDR_ASSERT(valid == TRUE);
++
++    // Add this assert for guidance
++    ADDR_ASSERT(numPipes * pTileInfo->banks >= 4);
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceMipLevelTileMode
++*
++*   @brief
++*       Compute valid tile mode for surface mipmap sub-levels
++*
++*   @return
++*       Suitable tile mode
++***************************************************************************************************
++*/
++AddrTileMode EgBasedAddrLib::ComputeSurfaceMipLevelTileMode(
++    AddrTileMode        baseTileMode,   ///< [in] base tile mode
++    UINT_32             bpp,            ///< [in] bits per pixels
++    UINT_32             pitch,          ///< [in] current level pitch
++    UINT_32             height,         ///< [in] current level height
++    UINT_32             numSlices,      ///< [in] current number of slices
++    UINT_32             numSamples,     ///< [in] number of samples
++    UINT_32             pitchAlign,     ///< [in] pitch alignment
++    UINT_32             heightAlign,    ///< [in] height alignment
++    ADDR_TILEINFO*      pTileInfo       ///< [in] ptr to bank structure
++    ) const
++{
++    UINT_64 bytesPerSlice;
++    UINT_32 bytesPerTile;
++
++    AddrTileMode expTileMode = baseTileMode;
++    UINT_32 microTileThickness = ComputeSurfaceThickness(expTileMode);
++    UINT_32 interleaveSize = m_pipeInterleaveBytes * m_bankInterleave;
++
++    //
++    // Compute the size of a slice.
++    //
++    bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
++    bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples);
++
++    //
++    // Reduce tiling mode from thick to thin if the number of slices is less than the
++    // micro tile thickness.
++    //
++    if (numSlices < microTileThickness)
++    {
++        expTileMode = HwlDegradeThickTileMode(expTileMode, numSlices, &bytesPerTile);
++    }
++
++    if (bytesPerTile > pTileInfo->tileSplitBytes)
++    {
++        bytesPerTile = pTileInfo->tileSplitBytes;
++    }
++
++    UINT_32 threshold1 =
++        bytesPerTile * HwlGetPipes(pTileInfo) * pTileInfo->bankWidth * pTileInfo->macroAspectRatio;
++
++    UINT_32 threshold2 =
++        bytesPerTile * pTileInfo->bankWidth * pTileInfo->bankHeight;
++
++    //
++    // Reduce the tile mode from 2D/3D to 1D in following conditions
++    //
++    switch (expTileMode)
++    {
++        case ADDR_TM_2D_TILED_THIN1: //fall through
++        case ADDR_TM_3D_TILED_THIN1:
++        case ADDR_TM_PRT_TILED_THIN1:
++        case ADDR_TM_PRT_2D_TILED_THIN1:
++        case ADDR_TM_PRT_3D_TILED_THIN1:
++            if ((pitch < pitchAlign) ||
++                (height < heightAlign) ||
++                (interleaveSize > threshold1) ||
++                (interleaveSize > threshold2))
++            {
++                expTileMode = ADDR_TM_1D_TILED_THIN1;
++            }
++            break;
++        case ADDR_TM_2D_TILED_THICK: //fall through
++        case ADDR_TM_3D_TILED_THICK:
++        case ADDR_TM_2D_TILED_XTHICK:
++        case ADDR_TM_3D_TILED_XTHICK:
++        case ADDR_TM_PRT_TILED_THICK:
++        case ADDR_TM_PRT_2D_TILED_THICK:
++        case ADDR_TM_PRT_3D_TILED_THICK:
++            if ((pitch < pitchAlign) ||
++                (height < heightAlign))
++            {
++                expTileMode = ADDR_TM_1D_TILED_THICK;
++            }
++            break;
++        default:
++            break;
++    }
++
++    return expTileMode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlDegradeBaseLevel
++*   @brief
++*       Check if degrade is needed for base level
++*   @return
++*       TRUE if degrade is suggested
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const
++{
++    BOOL_32 degrade = FALSE;
++    BOOL_32 valid = TRUE;
++
++    ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
++
++    UINT_32 baseAlign;
++    UINT_32 pitchAlign;
++    UINT_32 heightAlign;
++
++    ADDR_ASSERT(pIn->pTileInfo);
++    ADDR_TILEINFO tileInfo = *pIn->pTileInfo;
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
++
++    if (UseTileIndex(pIn->tileIndex))
++    {
++        out.tileIndex = pIn->tileIndex;
++        out.macroModeIndex = TileIndexInvalid;
++    }
++
++    HwlSetupTileInfo(pIn->tileMode,
++                     pIn->flags,
++                     pIn->bpp,
++                     pIn->width,
++                     pIn->height,
++                     pIn->numSamples,
++                     &tileInfo,
++                     &tileInfo,
++                     pIn->tileType,
++                     &out);
++
++    valid = ComputeSurfaceAlignmentsMacroTiled(pIn->tileMode,
++                                               pIn->bpp,
++                                               pIn->flags,
++                                               pIn->mipLevel,
++                                               pIn->numSamples,
++                                               &tileInfo,
++                                               &baseAlign,
++                                               &pitchAlign,
++                                               &heightAlign);
++
++    if (valid)
++    {
++        degrade = (pIn->width < pitchAlign || pIn->height < heightAlign);
++    }
++    else
++    {
++        degrade = TRUE;
++    }
++
++    return degrade;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlDegradeThickTileMode
++*
++*   @brief
++*       Degrades valid tile mode for thick modes if needed
++*
++*   @return
++*       Suitable tile mode
++***************************************************************************************************
++*/
++AddrTileMode EgBasedAddrLib::HwlDegradeThickTileMode(
++    AddrTileMode        baseTileMode,   ///< [in] base tile mode
++    UINT_32             numSlices,      ///< [in] current number of slices
++    UINT_32*            pBytesPerTile   ///< [in/out] pointer to bytes per slice
++    ) const
++{
++    ADDR_ASSERT(numSlices < ComputeSurfaceThickness(baseTileMode));
++    // if pBytesPerTile is NULL, this is a don't-care....
++    UINT_32 bytesPerTile = pBytesPerTile != NULL ? *pBytesPerTile : 64;
++
++    AddrTileMode expTileMode = baseTileMode;
++    switch (baseTileMode)
++    {
++        case ADDR_TM_1D_TILED_THICK:
++            expTileMode = ADDR_TM_1D_TILED_THIN1;
++            bytesPerTile >>= 2;
++            break;
++        case ADDR_TM_2D_TILED_THICK:
++            expTileMode = ADDR_TM_2D_TILED_THIN1;
++            bytesPerTile >>= 2;
++            break;
++        case ADDR_TM_3D_TILED_THICK:
++            expTileMode = ADDR_TM_3D_TILED_THIN1;
++            bytesPerTile >>= 2;
++            break;
++        case ADDR_TM_2D_TILED_XTHICK:
++            if (numSlices < ThickTileThickness)
++            {
++                expTileMode = ADDR_TM_2D_TILED_THIN1;
++                bytesPerTile >>= 3;
++            }
++            else
++            {
++                expTileMode = ADDR_TM_2D_TILED_THICK;
++                bytesPerTile >>= 1;
++            }
++            break;
++        case ADDR_TM_3D_TILED_XTHICK:
++            if (numSlices < ThickTileThickness)
++            {
++                expTileMode = ADDR_TM_3D_TILED_THIN1;
++                bytesPerTile >>= 3;
++            }
++            else
++            {
++                expTileMode = ADDR_TM_3D_TILED_THICK;
++                bytesPerTile >>= 1;
++            }
++            break;
++        default:
++            ADDR_ASSERT_ALWAYS();
++            break;
++    }
++
++    if (pBytesPerTile != NULL)
++    {
++        *pBytesPerTile = bytesPerTile;
++    }
++
++    return expTileMode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::DispatchComputeSurfaceAddrFromCoord
++*
++*   @brief
++*       Compute surface address from given coord (x, y, slice,sample)
++*
++*   @return
++*       Address in bytes
++***************************************************************************************************
++*/
++UINT_64 EgBasedAddrLib::DispatchComputeSurfaceAddrFromCoord(
++    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
++    ) const
++{
++    UINT_32             x                  = pIn->x;
++    UINT_32             y                  = pIn->y;
++    UINT_32             slice              = pIn->slice;
++    UINT_32             sample             = pIn->sample;
++    UINT_32             bpp                = pIn->bpp;
++    UINT_32             pitch              = pIn->pitch;
++    UINT_32             height             = pIn->height;
++    UINT_32             numSlices          = pIn->numSlices;
++    UINT_32             numSamples         = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
++    UINT_32             numFrags           = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
++    AddrTileMode        tileMode           = pIn->tileMode;
++    AddrTileType        microTileType      = pIn->tileType;
++    BOOL_32             ignoreSE           = pIn->ignoreSE;
++    BOOL_32             isDepthSampleOrder = pIn->isDepth;
++    ADDR_TILEINFO*      pTileInfo          = pIn->pTileInfo;
++
++    UINT_32*            pBitPosition       = &pOut->bitPosition;
++    UINT_64             addr;
++
++#if ADDR_AM_BUILD
++    UINT_32             addr5Bit           = 0;
++    UINT_32             addr5Swizzle       = pIn->addr5Swizzle;
++    BOOL_32             is32ByteTile       = pIn->is32ByteTile;
++#endif
++
++    // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order
++    if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
++    {
++        isDepthSampleOrder = TRUE;
++    }
++
++    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
++    {
++        if (numFrags != numSamples)
++        {
++            numSamples = numFrags;
++            ADDR_ASSERT(sample < numSamples);
++        }
++
++        /// @note
++        /// 128 bit/thick tiled surface doesn't support display tiling and
++        /// mipmap chain must have the same tileType, so please fill tileType correctly
++        if (!IsLinear(pIn->tileMode))
++        {
++            if (bpp >= 128 || ComputeSurfaceThickness(tileMode) > 1)
++            {
++                ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
++            }
++        }
++    }
++
++    switch (tileMode)
++    {
++        case ADDR_TM_LINEAR_GENERAL://fall through
++        case ADDR_TM_LINEAR_ALIGNED:
++            addr = ComputeSurfaceAddrFromCoordLinear(x,
++                                                     y,
++                                                     slice,
++                                                     sample,
++                                                     bpp,
++                                                     pitch,
++                                                     height,
++                                                     numSlices,
++                                                     pBitPosition);
++            break;
++        case ADDR_TM_1D_TILED_THIN1://fall through
++        case ADDR_TM_1D_TILED_THICK:
++            addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
++                                                         y,
++                                                         slice,
++                                                         sample,
++                                                         bpp,
++                                                         pitch,
++                                                         height,
++                                                         numSamples,
++                                                         tileMode,
++                                                         microTileType,
++                                                         isDepthSampleOrder,
++                                                         pBitPosition);
++            break;
++        case ADDR_TM_2D_TILED_THIN1:    //fall through
++        case ADDR_TM_2D_TILED_THICK:    //fall through
++        case ADDR_TM_3D_TILED_THIN1:    //fall through
++        case ADDR_TM_3D_TILED_THICK:    //fall through
++        case ADDR_TM_2D_TILED_XTHICK:   //fall through
++        case ADDR_TM_3D_TILED_XTHICK:   //fall through
++        case ADDR_TM_PRT_TILED_THIN1:   //fall through
++        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
++        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
++        case ADDR_TM_PRT_TILED_THICK:   //fall through
++        case ADDR_TM_PRT_2D_TILED_THICK://fall through
++        case ADDR_TM_PRT_3D_TILED_THICK:
++            UINT_32 pipeSwizzle;
++            UINT_32 bankSwizzle;
++
++            if (m_configFlags.useCombinedSwizzle)
++            {
++                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
++                                       &bankSwizzle, &pipeSwizzle);
++            }
++            else
++            {
++                pipeSwizzle = pIn->pipeSwizzle;
++                bankSwizzle = pIn->bankSwizzle;
++            }
++
++            addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
++                                                         y,
++                                                         slice,
++                                                         sample,
++                                                         bpp,
++                                                         pitch,
++                                                         height,
++                                                         numSamples,
++                                                         tileMode,
++                                                         microTileType,
++                                                         ignoreSE,
++                                                         isDepthSampleOrder,
++                                                         pipeSwizzle,
++                                                         bankSwizzle,
++                                                         pTileInfo,
++                                                         pBitPosition);
++            break;
++        default:
++            addr = 0;
++            ADDR_ASSERT_ALWAYS();
++            break;
++    }
++
++#if ADDR_AM_BUILD
++    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
++    {
++        if (addr5Swizzle && isDepthSampleOrder && is32ByteTile)
++        {
++            UINT_32 tx = x >> 3;
++            UINT_32 ty = y >> 3;
++            UINT_32 tileBits = ((ty&0x3) << 2) | (tx&0x3);
++
++            tileBits = tileBits & addr5Swizzle;
++            addr5Bit = XorReduce(tileBits, 4);
++
++            addr = addr | static_cast<UINT_64>(addr5Bit << 5);
++        }
++    }
++#endif
++
++    return addr;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled
++*
++*   @brief
++*       Computes the surface address and bit position from a
++*       coordinate for 2D tilied (macro tiled)
++*   @return
++*       The byte address
++***************************************************************************************************
++*/
++UINT_64 EgBasedAddrLib::ComputeSurfaceAddrFromCoordMacroTiled(
++    UINT_32             x,                      ///< [in] x coordinate
++    UINT_32             y,                      ///< [in] y coordinate
++    UINT_32             slice,                  ///< [in] slice index
++    UINT_32             sample,                 ///< [in] sample index
++    UINT_32             bpp,                    ///< [in] bits per pixel
++    UINT_32             pitch,                  ///< [in] surface pitch, in pixels
++    UINT_32             height,                 ///< [in] surface height, in pixels
++    UINT_32             numSamples,             ///< [in] number of samples
++    AddrTileMode        tileMode,               ///< [in] tile mode
++    AddrTileType        microTileType,          ///< [in] micro tiling type
++    BOOL_32             ignoreSE,               ///< [in] TRUE if shader enginers can be ignored
++    BOOL_32             isDepthSampleOrder,     ///< [in] TRUE if it depth sample ordering is used
++    UINT_32             pipeSwizzle,            ///< [in] pipe swizzle
++    UINT_32             bankSwizzle,            ///< [in] bank swizzle
++    ADDR_TILEINFO*      pTileInfo,              ///< [in] bank structure
++                                                ///  **All fields to be valid on entry**
++    UINT_32*            pBitPosition            ///< [out] bit position, e.g. FMT_1 will use this
++    ) const
++{
++    UINT_64 addr;
++
++    UINT_32 microTileBytes;
++    UINT_32 microTileBits;
++    UINT_32 sampleOffset;
++    UINT_32 pixelIndex;
++    UINT_32 pixelOffset;
++    UINT_32 elementOffset;
++    UINT_32 tileSplitSlice;
++    UINT_32 pipe;
++    UINT_32 bank;
++    UINT_64 sliceBytes;
++    UINT_64 sliceOffset;
++    UINT_32 macroTilePitch;
++    UINT_32 macroTileHeight;
++    UINT_32 macroTilesPerRow;
++    UINT_32 macroTilesPerSlice;
++    UINT_64 macroTileBytes;
++    UINT_32 macroTileIndexX;
++    UINT_32 macroTileIndexY;
++    UINT_64 macroTileOffset;
++    UINT_64 totalOffset;
++    UINT_64 pipeInterleaveMask;
++    UINT_64 bankInterleaveMask;
++    UINT_64 pipeInterleaveOffset;
++    UINT_32 bankInterleaveOffset;
++    UINT_64 offset;
++    UINT_32 tileRowIndex;
++    UINT_32 tileColumnIndex;
++    UINT_32 tileIndex;
++    UINT_32 tileOffset;
++
++    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
++
++    //
++    // Compute the number of group, pipe, and bank bits.
++    //
++    UINT_32 numPipes              = HwlGetPipes(pTileInfo);
++    UINT_32 numPipeInterleaveBits = Log2(m_pipeInterleaveBytes);
++    UINT_32 numPipeBits           = Log2(numPipes);
++    UINT_32 numBankInterleaveBits = Log2(m_bankInterleave);
++    UINT_32 numBankBits           = Log2(pTileInfo->banks);
++
++    //
++    // Compute the micro tile size.
++    //
++    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
++
++    microTileBytes = microTileBits / 8;
++    //
++    // Compute the pixel index within the micro tile.
++    //
++    pixelIndex = ComputePixelIndexWithinMicroTile(x,
++                                                  y,
++                                                  slice,
++                                                  bpp,
++                                                  tileMode,
++                                                  microTileType);
++
++    //
++    // Compute the sample offset and pixel offset.
++    //
++    if (isDepthSampleOrder)
++    {
++        //
++        // For depth surfaces, samples are stored contiguously for each element, so the sample
++        // offset is the sample number times the element size.
++        //
++        sampleOffset = sample * bpp;
++        pixelOffset  = pixelIndex * bpp * numSamples;
++    }
++    else
++    {
++        //
++        // For color surfaces, all elements for a particular sample are stored contiguously, so
++        // the sample offset is the sample number times the micro tile size divided yBit the number
++        // of samples.
++        //
++        sampleOffset = sample * (microTileBits / numSamples);
++        pixelOffset  = pixelIndex * bpp;
++    }
++
++    //
++    // Compute the element offset.
++    //
++    elementOffset = pixelOffset + sampleOffset;
++
++    *pBitPosition = static_cast<UINT_32>(elementOffset % 8);
++
++    elementOffset /= 8; //bit-to-byte
++
++    //
++    // Determine if tiles need to be split across slices.
++    //
++    // If the size of the micro tile is larger than the tile split size, then the tile will be
++    // split across multiple slices.
++    //
++    UINT_32 slicesPerTile = 1;
++
++    if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
++    {   //don't support for thick mode
++
++        //
++        // Compute the number of slices per tile.
++        //
++        slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
++
++        //
++        // Compute the tile split slice number for use in rotating the bank.
++        //
++        tileSplitSlice = elementOffset / pTileInfo->tileSplitBytes;
++
++        //
++        // Adjust the element offset to account for the portion of the tile that is being moved to
++        // a new slice..
++        //
++        elementOffset %= pTileInfo->tileSplitBytes;
++
++        //
++        // Adjust the microTileBytes size to tileSplitBytes size since
++        // a new slice..
++        //
++        microTileBytes = pTileInfo->tileSplitBytes;
++    }
++    else
++    {
++        tileSplitSlice = 0;
++    }
++
++    //
++    // Compute macro tile pitch and height.
++    //
++    macroTilePitch  =
++        (MicroTileWidth  * pTileInfo->bankWidth  * numPipes) * pTileInfo->macroAspectRatio;
++    macroTileHeight =
++        (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / pTileInfo->macroAspectRatio;
++
++    //
++    // Compute the number of bytes per macro tile. Note: bytes of the same bank/pipe actually
++    //
++    macroTileBytes =
++        static_cast<UINT_64>(microTileBytes) *
++        (macroTilePitch / MicroTileWidth) * (macroTileHeight / MicroTileHeight) /
++        (numPipes * pTileInfo->banks);
++
++    //
++    // Compute the number of macro tiles per row.
++    //
++    macroTilesPerRow = pitch / macroTilePitch;
++
++    //
++    // Compute the offset to the macro tile containing the specified coordinate.
++    //
++    macroTileIndexX = x / macroTilePitch;
++    macroTileIndexY = y / macroTileHeight;
++    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
++
++    //
++    // Compute the number of macro tiles per slice.
++    //
++    macroTilesPerSlice = macroTilesPerRow  * (height / macroTileHeight);
++
++    //
++    // Compute the slice size.
++    //
++    sliceBytes = macroTilesPerSlice * macroTileBytes;
++
++    //
++    // Compute the slice offset.
++    //
++    sliceOffset = sliceBytes * (tileSplitSlice + slicesPerTile * (slice / microTileThickness));
++
++    //
++    // Compute tile offest
++    //
++    tileRowIndex    = (y / MicroTileHeight) % pTileInfo->bankHeight;
++    tileColumnIndex = ((x / MicroTileWidth) / numPipes) % pTileInfo->bankWidth;
++    tileIndex        = (tileRowIndex * pTileInfo->bankWidth) + tileColumnIndex;
++    tileOffset       = tileIndex * microTileBytes;
++
++    //
++    // Combine the slice offset and macro tile offset with the pixel and sample offsets, accounting
++    // for the pipe and bank bits in the middle of the address.
++    //
++    totalOffset = sliceOffset + macroTileOffset + elementOffset + tileOffset;
++
++    //
++    // Get the pipe and bank.
++    //
++
++    // when the tileMode is PRT type, then adjust x and y coordinates
++    if (IsPrtNoRotationTileMode(tileMode))
++    {
++        x = x % macroTilePitch;
++        y = y % macroTileHeight;
++    }
++
++    pipe = ComputePipeFromCoord(x,
++                                y,
++                                slice,
++                                tileMode,
++                                pipeSwizzle,
++                                ignoreSE,
++                                pTileInfo);
++
++    bank = ComputeBankFromCoord(x,
++                                y,
++                                slice,
++                                tileMode,
++                                bankSwizzle,
++                                tileSplitSlice,
++                                pTileInfo);
++
++
++    //
++    // Split the offset to put some bits below the pipe+bank bits and some above.
++    //
++    pipeInterleaveMask = (1 << numPipeInterleaveBits) - 1;
++    bankInterleaveMask = (1 << numBankInterleaveBits) - 1;
++    pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
++    bankInterleaveOffset = static_cast<UINT_32>((totalOffset >> numPipeInterleaveBits) &
++                                                bankInterleaveMask);
++    offset               =  totalOffset >> (numPipeInterleaveBits + numBankInterleaveBits);
++
++    //
++    // Assemble the address from its components.
++    //
++    addr  = pipeInterleaveOffset;
++    // This is to remove /analyze warnings
++    UINT_32 pipeBits            = pipe                 <<  numPipeInterleaveBits;
++    UINT_32 bankInterleaveBits  = bankInterleaveOffset << (numPipeInterleaveBits + numPipeBits);
++    UINT_32 bankBits            = bank                 << (numPipeInterleaveBits + numPipeBits +
++                                                           numBankInterleaveBits);
++    UINT_64 offsetBits          = offset               << (numPipeInterleaveBits + numPipeBits +
++                                                           numBankInterleaveBits + numBankBits);
++
++    addr |= pipeBits;
++    addr |= bankInterleaveBits;
++    addr |= bankBits;
++    addr |= offsetBits;
++
++    return addr;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled
++*
++*   @brief
++*       Computes the surface address and bit position from a coordinate for 1D tilied
++*       (micro tiled)
++*   @return
++*       The byte address
++***************************************************************************************************
++*/
++UINT_64 EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled(
++    UINT_32             x,                      ///< [in] x coordinate
++    UINT_32             y,                      ///< [in] y coordinate
++    UINT_32             slice,                  ///< [in] slice index
++    UINT_32             sample,                 ///< [in] sample index
++    UINT_32             bpp,                    ///< [in] bits per pixel
++    UINT_32             pitch,                  ///< [in] pitch, in pixels
++    UINT_32             height,                 ///< [in] height, in pixels
++    UINT_32             numSamples,             ///< [in] number of samples
++    AddrTileMode        tileMode,               ///< [in] tile mode
++    AddrTileType        microTileType,          ///< [in] micro tiling type
++    BOOL_32             isDepthSampleOrder,     ///< [in] TRUE if depth sample ordering is used
++    UINT_32*            pBitPosition            ///< [out] bit position, e.g. FMT_1 will use this
++    ) const
++{
++    UINT_64 addr = 0;
++
++    UINT_32 microTileBytes;
++    UINT_64 sliceBytes;
++    UINT_32 microTilesPerRow;
++    UINT_32 microTileIndexX;
++    UINT_32 microTileIndexY;
++    UINT_32 microTileIndexZ;
++    UINT_64 sliceOffset;
++    UINT_64 microTileOffset;
++    UINT_32 sampleOffset;
++    UINT_32 pixelIndex;
++    UINT_32 pixelOffset;
++
++    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
++
++    //
++    // Compute the micro tile size.
++    //
++    microTileBytes = BITS_TO_BYTES(MicroTilePixels * microTileThickness * bpp * numSamples);
++
++    //
++    // Compute the slice size.
++    //
++    sliceBytes =
++        BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples);
++
++    //
++    // Compute the number of micro tiles per row.
++    //
++    microTilesPerRow = pitch / MicroTileWidth;
++
++    //
++    // Compute the micro tile index.
++    //
++    microTileIndexX = x     / MicroTileWidth;
++    microTileIndexY = y     / MicroTileHeight;
++    microTileIndexZ = slice / microTileThickness;
++
++    //
++    // Compute the slice offset.
++    //
++    sliceOffset = static_cast<UINT_64>(microTileIndexZ) * sliceBytes;
++
++    //
++    // Compute the offset to the micro tile containing the specified coordinate.
++    //
++    microTileOffset = (static_cast<UINT_64>(microTileIndexY) * microTilesPerRow + microTileIndexX) *
++        microTileBytes;
++
++    //
++    // Compute the pixel index within the micro tile.
++    //
++    pixelIndex = ComputePixelIndexWithinMicroTile(x,
++                                                  y,
++                                                  slice,
++                                                  bpp,
++                                                  tileMode,
++                                                  microTileType);
++
++    // Compute the sample offset.
++    //
++    if (isDepthSampleOrder)
++    {
++        //
++        // For depth surfaces, samples are stored contiguously for each element, so the sample
++        // offset is the sample number times the element size.
++        //
++        sampleOffset = sample * bpp;
++        pixelOffset = pixelIndex * bpp * numSamples;
++    }
++    else
++    {
++        //
++        // For color surfaces, all elements for a particular sample are stored contiguously, so
++        // the sample offset is the sample number times the micro tile size divided yBit the number
++        // of samples.
++        //
++        sampleOffset = sample * (microTileBytes*8 / numSamples);
++        pixelOffset = pixelIndex * bpp;
++    }
++
++    //
++    // Compute the bit position of the pixel.  Each element is stored with one bit per sample.
++    //
++
++    UINT_32 elemOffset = sampleOffset + pixelOffset;
++
++    *pBitPosition = elemOffset % 8;
++    elemOffset /= 8;
++
++    //
++    // Combine the slice offset, micro tile offset, sample offset, and pixel offsets.
++    //
++    addr = sliceOffset + microTileOffset + elemOffset;
++
++    return addr;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputePixelCoordFromOffset
++*
++*   @brief
++*       Compute pixel coordinate from offset inside a micro tile
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID EgBasedAddrLib::HwlComputePixelCoordFromOffset(
++    UINT_32         offset,             ///< [in] offset inside micro tile in bits
++    UINT_32         bpp,                ///< [in] bits per pixel
++    UINT_32         numSamples,         ///< [in] number of samples
++    AddrTileMode    tileMode,           ///< [in] tile mode
++    UINT_32         tileBase,           ///< [in] base offset within a tile
++    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
++    UINT_32*        pX,                 ///< [out] x coordinate
++    UINT_32*        pY,                 ///< [out] y coordinate
++    UINT_32*        pSlice,             ///< [out] slice index
++    UINT_32*        pSample,            ///< [out] sample index
++    AddrTileType    microTileType,      ///< [in] micro tiling type
++    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if depth sample order in microtile is used
++    ) const
++{
++    UINT_32 x = 0;
++    UINT_32 y = 0;
++    UINT_32 z = 0;
++    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++
++    // For planar surface, we adjust offset acoording to tile base
++    if ((bpp != compBits) && (compBits != 0) && isDepthSampleOrder)
++    {
++        offset -= tileBase;
++
++        ADDR_ASSERT(microTileType == ADDR_NON_DISPLAYABLE ||
++                    microTileType == ADDR_DEPTH_SAMPLE_ORDER);
++
++        bpp = compBits;
++    }
++
++    UINT_32 sampleTileBits;
++    UINT_32 samplePixelBits;
++    UINT_32 pixelIndex;
++
++    if (isDepthSampleOrder)
++    {
++        samplePixelBits = bpp * numSamples;
++        pixelIndex = offset / samplePixelBits;
++        *pSample = (offset % samplePixelBits) / bpp;
++    }
++    else
++    {
++        sampleTileBits = MicroTilePixels * bpp * thickness;
++        *pSample = offset / sampleTileBits;
++        pixelIndex = (offset % sampleTileBits) / bpp;
++    }
++
++    if (microTileType != ADDR_THICK)
++    {
++        if (microTileType == ADDR_DISPLAYABLE) // displayable
++        {
++            switch (bpp)
++            {
++                case 8:
++                    x = pixelIndex & 0x7;
++                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
++                    break;
++                case 16:
++                    x = pixelIndex & 0x7;
++                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
++                    break;
++                case 32:
++                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
++                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
++                    break;
++                case 64:
++                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
++                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
++                    break;
++                case 128:
++                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,1));
++                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,0));
++                    break;
++                default:
++                    break;
++            }
++        }
++        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
++        {
++            x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
++            y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
++        }
++        else if (microTileType == ADDR_ROTATED)
++        {
++            /*
++                8-Bit Elements
++                element_index[5:0] = { x[2], x[0], x[1], y[2], y[1], y[0] }
++
++                16-Bit Elements
++                element_index[5:0] = { x[2], x[1], x[0], y[2], y[1], y[0] }
++
++                32-Bit Elements
++                element_index[5:0] = { x[2], x[1], y[2], x[0], y[1], y[0] }
++
++                64-Bit Elements
++                element_index[5:0] = { y[2], x[2], x[1], y[1], x[0], y[0] }
++            */
++            switch(bpp)
++            {
++                case 8:
++                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
++                    y = pixelIndex & 0x7;
++                    break;
++                case 16:
++                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
++                    y = pixelIndex & 0x7;
++                    break;
++                case 32:
++                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
++                    y = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
++                    break;
++                case 64:
++                    x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
++                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    break;
++            }
++        }
++
++        if (thickness > 1) // thick
++        {
++            z = Bits2Number(3, _BIT(pixelIndex,8),_BIT(pixelIndex,7),_BIT(pixelIndex,6));
++        }
++    }
++    else
++    {
++        ADDR_ASSERT((m_chipFamily >= ADDR_CHIP_FAMILY_CI) && (thickness > 1));
++        /*
++            8-Bit Elements and 16-Bit Elements
++            element_index[7:0] = { y[2], x[2], z[1], z[0], y[1], x[1], y[0], x[0] }
++
++            32-Bit Elements
++            element_index[7:0] = { y[2], x[2], z[1], y[1], z[0], x[1], y[0], x[0] }
++
++            64-Bit Elements and 128-Bit Elements
++            element_index[7:0] = { y[2], x[2], z[1], y[1], x[1], z[0], y[0], x[0] }
++
++            The equation to compute the element index for the extra thick tile:
++            element_index[8] = z[2]
++        */
++        switch (bpp)
++        {
++            case 8:
++            case 16: // fall-through
++                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
++                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
++                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,4));
++                break;
++            case 32:
++                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
++                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
++                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,3));
++                break;
++            case 64:
++            case 128: // fall-through
++                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,3),_BIT(pixelIndex,0));
++                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
++                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,2));
++                break;
++            default:
++                ADDR_ASSERT_ALWAYS();
++                break;
++        }
++
++        if (thickness == 8)
++        {
++            z += Bits2Number(3,_BIT(pixelIndex,8),0,0);
++        }
++    }
++
++    *pX = x;
++    *pY = y;
++    *pSlice += z;
++}
++
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::DispatchComputeSurfaceCoordFromAddrDispatch
++*
++*   @brief
++*       Compute (x,y,slice,sample) coordinates from surface address
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID EgBasedAddrLib::DispatchComputeSurfaceCoordFromAddr(
++    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
++    ) const
++{
++    UINT_64             addr               = pIn->addr;
++    UINT_32             bitPosition        = pIn->bitPosition;
++    UINT_32             bpp                = pIn->bpp;
++    UINT_32             pitch              = pIn->pitch;
++    UINT_32             height             = pIn->height;
++    UINT_32             numSlices          = pIn->numSlices;
++    UINT_32             numSamples         = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
++    UINT_32             numFrags           = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
++    AddrTileMode        tileMode           = pIn->tileMode;
++    UINT_32             tileBase           = pIn->tileBase;
++    UINT_32             compBits           = pIn->compBits;
++    AddrTileType        microTileType      = pIn->tileType;
++    BOOL_32             ignoreSE           = pIn->ignoreSE;
++    BOOL_32             isDepthSampleOrder = pIn->isDepth;
++    ADDR_TILEINFO*      pTileInfo          = pIn->pTileInfo;
++
++    UINT_32*            pX                 = &pOut->x;
++    UINT_32*            pY                 = &pOut->y;
++    UINT_32*            pSlice             = &pOut->slice;
++    UINT_32*            pSample            = &pOut->sample;
++
++    if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
++    {
++        isDepthSampleOrder = TRUE;
++    }
++
++    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
++    {
++        if (numFrags != numSamples)
++        {
++            numSamples = numFrags;
++        }
++
++        /// @note
++        /// 128 bit/thick tiled surface doesn't support display tiling and
++        /// mipmap chain must have the same tileType, so please fill tileType correctly
++        if (!IsLinear(pIn->tileMode))
++        {
++            if (bpp >= 128 || ComputeSurfaceThickness(tileMode) > 1)
++            {
++                ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
++            }
++        }
++    }
++
++    switch (tileMode)
++    {
++        case ADDR_TM_LINEAR_GENERAL://fall through
++        case ADDR_TM_LINEAR_ALIGNED:
++            ComputeSurfaceCoordFromAddrLinear(addr,
++                                              bitPosition,
++                                              bpp,
++                                              pitch,
++                                              height,
++                                              numSlices,
++                                              pX,
++                                              pY,
++                                              pSlice,
++                                              pSample);
++            break;
++        case ADDR_TM_1D_TILED_THIN1://fall through
++        case ADDR_TM_1D_TILED_THICK:
++            ComputeSurfaceCoordFromAddrMicroTiled(addr,
++                                                  bitPosition,
++                                                  bpp,
++                                                  pitch,
++                                                  height,
++                                                  numSamples,
++                                                  tileMode,
++                                                  tileBase,
++                                                  compBits,
++                                                  pX,
++                                                  pY,
++                                                  pSlice,
++                                                  pSample,
++                                                  microTileType,
++                                                  isDepthSampleOrder);
++            break;
++        case ADDR_TM_2D_TILED_THIN1:    //fall through
++        case ADDR_TM_2D_TILED_THICK:    //fall through
++        case ADDR_TM_3D_TILED_THIN1:    //fall through
++        case ADDR_TM_3D_TILED_THICK:    //fall through
++        case ADDR_TM_2D_TILED_XTHICK:   //fall through
++        case ADDR_TM_3D_TILED_XTHICK:   //fall through
++        case ADDR_TM_PRT_TILED_THIN1:   //fall through
++        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
++        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
++        case ADDR_TM_PRT_TILED_THICK:   //fall through
++        case ADDR_TM_PRT_2D_TILED_THICK://fall through
++        case ADDR_TM_PRT_3D_TILED_THICK:
++            UINT_32 pipeSwizzle;
++            UINT_32 bankSwizzle;
++
++            if (m_configFlags.useCombinedSwizzle)
++            {
++                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
++                                       &bankSwizzle, &pipeSwizzle);
++            }
++            else
++            {
++                pipeSwizzle = pIn->pipeSwizzle;
++                bankSwizzle = pIn->bankSwizzle;
++            }
++
++            ComputeSurfaceCoordFromAddrMacroTiled(addr,
++                                                  bitPosition,
++                                                  bpp,
++                                                  pitch,
++                                                  height,
++                                                  numSamples,
++                                                  tileMode,
++                                                  tileBase,
++                                                  compBits,
++                                                  microTileType,
++                                                  ignoreSE,
++                                                  isDepthSampleOrder,
++                                                  pipeSwizzle,
++                                                  bankSwizzle,
++                                                  pTileInfo,
++                                                  pX,
++                                                  pY,
++                                                  pSlice,
++                                                  pSample);
++            break;
++        default:
++            ADDR_ASSERT_ALWAYS();
++    }
++}
++
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceCoordFromAddrMacroTiled
++*
++*   @brief
++*       Compute surface coordinates from address for macro tiled surface
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID EgBasedAddrLib::ComputeSurfaceCoordFromAddrMacroTiled(
++    UINT_64             addr,               ///< [in] byte address
++    UINT_32             bitPosition,        ///< [in] bit position
++    UINT_32             bpp,                ///< [in] bits per pixel
++    UINT_32             pitch,              ///< [in] pitch in pixels
++    UINT_32             height,             ///< [in] height in pixels
++    UINT_32             numSamples,         ///< [in] number of samples
++    AddrTileMode        tileMode,           ///< [in] tile mode
++    UINT_32             tileBase,           ///< [in] tile base offset
++    UINT_32             compBits,           ///< [in] component bits (for planar surface)
++    AddrTileType        microTileType,      ///< [in] micro tiling type
++    BOOL_32             ignoreSE,           ///< [in] TRUE if shader engines can be ignored
++    BOOL_32             isDepthSampleOrder, ///< [in] TRUE if depth sample order is used
++    UINT_32             pipeSwizzle,        ///< [in] pipe swizzle
++    UINT_32             bankSwizzle,        ///< [in] bank swizzle
++    ADDR_TILEINFO*      pTileInfo,          ///< [in] bank structure.
++                                            ///  **All fields to be valid on entry**
++    UINT_32*            pX,                 ///< [out] X coord
++    UINT_32*            pY,                 ///< [out] Y coord
++    UINT_32*            pSlice,             ///< [out] slice index
++    UINT_32*            pSample             ///< [out] sample index
++    ) const
++{
++    UINT_32 mx;
++    UINT_32 my;
++    UINT_64 tileBits;
++    UINT_64 macroTileBits;
++    UINT_32 slices;
++    UINT_32 tileSlices;
++    UINT_64 elementOffset;
++    UINT_64 macroTileIndex;
++    UINT_32 tileIndex;
++    UINT_64 totalOffset;
++
++
++    UINT_32 bank;
++    UINT_32 pipe;
++    UINT_32 groupBits = m_pipeInterleaveBytes << 3;
++    UINT_32 pipes = HwlGetPipes(pTileInfo);
++    UINT_32 banks = pTileInfo->banks;
++
++    UINT_32 bankInterleave = m_bankInterleave;
++
++    UINT_64 addrBits = BYTES_TO_BITS(addr) + bitPosition;
++
++    //
++    // remove bits for bank and pipe
++    //
++    totalOffset = (addrBits % groupBits) +
++        (((addrBits / groupBits / pipes) % bankInterleave) * groupBits) +
++        (((addrBits / groupBits / pipes) / bankInterleave) / banks) * groupBits * bankInterleave;
++
++    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
++
++    UINT_32 microTileBits = bpp * microTileThickness * MicroTilePixels * numSamples;
++
++    UINT_32 microTileBytes = BITS_TO_BYTES(microTileBits);
++    //
++    // Determine if tiles need to be split across slices.
++    //
++    // If the size of the micro tile is larger than the tile split size, then the tile will be
++    // split across multiple slices.
++    //
++    UINT_32 slicesPerTile = 1; //_State->TileSlices
++
++    if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
++    {   //don't support for thick mode
++
++        //
++        // Compute the number of slices per tile.
++        //
++        slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
++    }
++
++    tileBits = microTileBits / slicesPerTile; // micro tile bits
++
++    // in micro tiles because not MicroTileWidth timed.
++    UINT_32 macroWidth  = pTileInfo->bankWidth * pipes * pTileInfo->macroAspectRatio;
++    // in micro tiles as well
++    UINT_32 macroHeight = pTileInfo->bankHeight * banks / pTileInfo->macroAspectRatio;
++
++    UINT_32 pitchInMacroTiles = pitch / MicroTileWidth / macroWidth;
++
++    macroTileBits = (macroWidth * macroHeight) * tileBits / (banks * pipes);
++
++    macroTileIndex = totalOffset / macroTileBits;
++
++    // pitchMacros * height / heightMacros;  macroTilesPerSlice == _State->SliceMacros
++    UINT_32 macroTilesPerSlice = (pitch / (macroWidth * MicroTileWidth)) * height /
++        (macroHeight * MicroTileWidth);
++
++    slices = static_cast<UINT_32>(macroTileIndex / macroTilesPerSlice);
++
++    *pSlice = static_cast<UINT_32>(slices / slicesPerTile * microTileThickness);
++
++    //
++    // calculate element offset and x[2:0], y[2:0], z[1:0] for thick
++    //
++    tileSlices = slices % slicesPerTile;
++
++    elementOffset  = tileSlices * tileBits;
++    elementOffset += totalOffset % tileBits;
++
++    UINT_32 coordZ = 0;
++
++    HwlComputePixelCoordFromOffset(static_cast<UINT_32>(elementOffset),
++                                   bpp,
++                                   numSamples,
++                                   tileMode,
++                                   tileBase,
++                                   compBits,
++                                   pX,
++                                   pY,
++                                   &coordZ,
++                                   pSample,
++                                   microTileType,
++                                   isDepthSampleOrder);
++
++    macroTileIndex = macroTileIndex % macroTilesPerSlice;
++    *pY += static_cast<UINT_32>(macroTileIndex / pitchInMacroTiles * macroHeight * MicroTileHeight);
++    *pX += static_cast<UINT_32>(macroTileIndex % pitchInMacroTiles * macroWidth * MicroTileWidth);
++
++    *pSlice += coordZ;
++
++    tileIndex = static_cast<UINT_32>((totalOffset % macroTileBits) / tileBits);
++
++    my = (tileIndex / pTileInfo->bankWidth) % pTileInfo->bankHeight * MicroTileHeight;
++    mx = (tileIndex % pTileInfo->bankWidth) * pipes * MicroTileWidth;
++
++    *pY += my;
++    *pX += mx;
++
++    bank = ComputeBankFromAddr(addr, banks, pipes);
++    pipe = ComputePipeFromAddr(addr, pipes);
++
++    HwlComputeSurfaceCoord2DFromBankPipe(tileMode,
++                                         pX,
++                                         pY,
++                                         *pSlice,
++                                         bank,
++                                         pipe,
++                                         bankSwizzle,
++                                         pipeSwizzle,
++                                         tileSlices,
++                                         ignoreSE,
++                                         pTileInfo);
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSurfaceCoord2DFromBankPipe
++*
++*   @brief
++*       Compute surface x,y coordinates from bank/pipe info
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID EgBasedAddrLib::ComputeSurfaceCoord2DFromBankPipe(
++    AddrTileMode        tileMode,   ///< [in] tile mode
++    UINT_32             x,          ///< [in] x coordinate
++    UINT_32             y,          ///< [in] y coordinate
++    UINT_32             slice,      ///< [in] slice index
++    UINT_32             bank,       ///< [in] bank number
++    UINT_32             pipe,       ///< [in] pipe number
++    UINT_32             bankSwizzle,///< [in] bank swizzle
++    UINT_32             pipeSwizzle,///< [in] pipe swizzle
++    UINT_32             tileSlices, ///< [in] slices in a micro tile
++    ADDR_TILEINFO*      pTileInfo,  ///< [in] bank structure. **All fields to be valid on entry**
++    CoordFromBankPipe*  pOutput     ///< [out] pointer to extracted x/y bits
++    ) const
++{
++    UINT_32 yBit3 = 0;
++    UINT_32 yBit4 = 0;
++    UINT_32 yBit5 = 0;
++    UINT_32 yBit6 = 0;
++
++    UINT_32 xBit3 = 0;
++    UINT_32 xBit4 = 0;
++    UINT_32 xBit5 = 0;
++
++    UINT_32 tileSplitRotation;
++
++    UINT_32 numPipes = HwlGetPipes(pTileInfo);
++
++    UINT_32 bankRotation = ComputeBankRotation(tileMode,
++                                               pTileInfo->banks, numPipes);
++
++    UINT_32 pipeRotation = ComputePipeRotation(tileMode, numPipes);
++
++    UINT_32 xBit = x / (MicroTileWidth * pTileInfo->bankWidth * numPipes);
++    UINT_32 yBit = y / (MicroTileHeight * pTileInfo->bankHeight);
++
++    //calculate the bank and pipe before rotation and swizzle
++
++    switch (tileMode)
++    {
++        case ADDR_TM_2D_TILED_THIN1:  //fall through
++        case ADDR_TM_2D_TILED_THICK:  //fall through
++        case ADDR_TM_2D_TILED_XTHICK: //fall through
++        case ADDR_TM_3D_TILED_THIN1:  //fall through
++        case ADDR_TM_3D_TILED_THICK:  //fall through
++        case ADDR_TM_3D_TILED_XTHICK:
++            tileSplitRotation = ((pTileInfo->banks / 2) + 1);
++            break;
++        default:
++            tileSplitRotation =  0;
++            break;
++    }
++
++    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
++
++    bank ^= tileSplitRotation * tileSlices;
++    if (pipeRotation == 0)
++    {
++        bank ^= bankRotation * (slice / microTileThickness) + bankSwizzle;
++        bank %= pTileInfo->banks;
++        pipe ^= pipeSwizzle;
++    }
++    else
++    {
++        bank ^= bankRotation * (slice / microTileThickness) / numPipes + bankSwizzle;
++        bank %= pTileInfo->banks;
++        pipe ^= pipeRotation * (slice / microTileThickness) + pipeSwizzle;
++    }
++
++    if (pTileInfo->macroAspectRatio == 1)
++    {
++        switch (pTileInfo->banks)
++        {
++            case 2:
++                yBit3 = _BIT(bank, 0) ^ _BIT(xBit,0);
++                break;
++            case 4:
++                yBit4 = _BIT(bank, 0) ^ _BIT(xBit,0);
++                yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
++                break;
++            case 8:
++                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
++                yBit5 = _BIT(bank, 0) ^ _BIT(xBit,0);
++                yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ yBit5;
++                break;
++            case 16:
++                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3);
++                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2);
++                yBit6 = _BIT(bank, 0) ^ _BIT(xBit, 0);
++                yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ yBit6;
++                break;
++            default:
++                break;
++        }
++
++    }
++    else if (pTileInfo->macroAspectRatio == 2)
++    {
++        switch (pTileInfo->banks)
++        {
++            case 2: //xBit3 = yBit3^b0
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,0);
++                break;
++            case 4: //xBit3=yBit4^b0; yBit3=xBit4^b1
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
++                yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
++                break;
++            case 8: //xBit4, xBit5, yBit5 are known
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
++                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
++                yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ _BIT(yBit, 2);
++                break;
++            case 16://x4,x5,x6,y6 are known
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3); //x3 = y6 ^ b0
++                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
++                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = x5 ^ b2
++                yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ _BIT(yBit, 3); //y5=x4^y6^b1
++                break;
++            default:
++                break;
++        }
++    }
++    else if (pTileInfo->macroAspectRatio == 4)
++    {
++        switch (pTileInfo->banks)
++        {
++            case 4: //yBit3, yBit4
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
++                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,0);
++                break;
++            case 8: //xBit5, yBit4, yBit5
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
++                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
++                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^  _BIT(yBit,2);
++                break;
++            case 16: //xBit5, xBit6, yBit5, yBit6
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = b0 ^ y6
++                xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = b1 ^ y5 ^ y6;
++                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = b3 ^ x6;
++                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = b2 ^ x5;
++                break;
++            default:
++                break;
++        }
++    }
++    else if (pTileInfo->macroAspectRatio == 8)
++    {
++        switch (pTileInfo->banks)
++        {
++            case 8: //yBit3, yBit4, yBit5
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); //x3 = b0 ^ y5;
++                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit, 2);//x4 = b1 ^ y4 ^ y5;
++                xBit5 = _BIT(bank, 2) ^ _BIT(yBit,0);
++                break;
++            case 16: //xBit6, yBit4, yBit5, yBit6
++                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = y6 ^ b0
++                xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = y5 ^ y6 ^ b1
++                xBit5 = _BIT(bank, 2) ^ _BIT(yBit, 1);//x5 = y4 ^ b2
++                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
++                break;
++            default:
++                break;
++        }
++    }
++
++    pOutput->xBits = xBit;
++    pOutput->yBits = yBit;
++
++    pOutput->xBit3 = xBit3;
++    pOutput->xBit4 = xBit4;
++    pOutput->xBit5 = xBit5;
++    pOutput->yBit3 = yBit3;
++    pOutput->yBit4 = yBit4;
++    pOutput->yBit5 = yBit5;
++    pOutput->yBit6 = yBit6;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlExtractBankPipeSwizzle
++*   @brief
++*       Entry of EgBasedAddrLib ExtractBankPipeSwizzle
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlExtractBankPipeSwizzle(
++    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,   ///< [in] input structure
++    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut   ///< [out] output structure
++    ) const
++{
++    ExtractBankPipeSwizzle(pIn->base256b,
++                           pIn->pTileInfo,
++                           &pOut->bankSwizzle,
++                           &pOut->pipeSwizzle);
++
++    return ADDR_OK;
++}
++
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlCombineBankPipeSwizzle
++*   @brief
++*       Combine bank/pipe swizzle
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlCombineBankPipeSwizzle(
++    UINT_32         bankSwizzle,    ///< [in] bank swizzle
++    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
++    ADDR_TILEINFO*  pTileInfo,      ///< [in] tile info
++    UINT_64         baseAddr,       ///< [in] base address
++    UINT_32*        pTileSwizzle    ///< [out] combined swizzle
++    ) const
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++    if (pTileSwizzle)
++    {
++        *pTileSwizzle = GetBankPipeSwizzle(bankSwizzle, pipeSwizzle, baseAddr, pTileInfo);
++    }
++    else
++    {
++        retCode = ADDR_INVALIDPARAMS;
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeBaseSwizzle
++*   @brief
++*       Compute base swizzle
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeBaseSwizzle(
++    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
++    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut
++    ) const
++{
++    UINT_32 bankSwizzle = 0;
++    UINT_32 pipeSwizzle = 0;
++    ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
++
++    ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
++    ADDR_ASSERT(pIn->pTileInfo);
++
++    /// This is a legacy misreading of h/w doc, use it as it doesn't hurt.
++    static const UINT_8 bankRotationArray[4][16] = {
++        { 0, 0,  0, 0,  0, 0,  0, 0, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_2_BANK
++        { 0, 1,  2, 3,  0, 0,  0, 0, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_4_BANK
++        { 0, 3,  6, 1,  4, 7,  2, 5, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_8_BANK
++        { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK
++    };
++
++    UINT_32 pipes = HwlGetPipes(pTileInfo);
++    UINT_32 banks = pTileInfo ? pTileInfo->banks : 2;
++    UINT_32 hwNumBanks;
++
++    // Uses less bank swizzle bits
++    if (pIn->option.reduceBankBit && banks > 2)
++    {
++        banks >>= 1;
++    }
++
++    switch (banks)
++    {
++        case 2:
++            hwNumBanks = 0;
++            break;
++        case 4:
++            hwNumBanks = 1;
++            break;
++        case 8:
++            hwNumBanks = 2;
++            break;
++        case 16:
++            hwNumBanks = 3;
++            break;
++        default:
++            ADDR_ASSERT_ALWAYS();
++            hwNumBanks = 0;
++            break;
++    }
++
++    if (pIn->option.genOption == ADDR_SWIZZLE_GEN_LINEAR)
++    {
++        bankSwizzle = pIn->surfIndex & (banks - 1);
++    }
++    else // (pIn->option.genOption == ADDR_SWIZZLE_GEN_DEFAULT)
++    {
++        bankSwizzle = bankRotationArray[hwNumBanks][pIn->surfIndex & (banks - 1)];
++    }
++
++    if (IsMacro3dTiled(pIn->tileMode))
++    {
++        pipeSwizzle = pIn->surfIndex & (HwlGetPipes(pTileInfo) - 1);
++    }
++
++    return HwlCombineBankPipeSwizzle(bankSwizzle, pipeSwizzle, pTileInfo, 0, &pOut->tileSwizzle);
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ExtractBankPipeSwizzle
++*   @brief
++*       Extract bank/pipe swizzle from base256b
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID EgBasedAddrLib::ExtractBankPipeSwizzle(
++    UINT_32         base256b,       ///< [in] input base256b register value
++    ADDR_TILEINFO*  pTileInfo,      ///< [in] 2D tile parameters. Client must provide all data
++    UINT_32*        pBankSwizzle,   ///< [out] bank swizzle
++    UINT_32*        pPipeSwizzle    ///< [out] pipe swizzle
++    ) const
++{
++    UINT_32 bankSwizzle = 0;
++    UINT_32 pipeSwizzle = 0;
++
++    if (base256b != 0)
++    {
++        UINT_32 numPipes        = HwlGetPipes(pTileInfo);
++        UINT_32 bankBits        = QLog2(pTileInfo->banks);
++        UINT_32 pipeBits        = QLog2(numPipes);
++        UINT_32 groupBytes      = m_pipeInterleaveBytes;
++        UINT_32 bankInterleave  = m_bankInterleave;
++
++        pipeSwizzle =
++            (base256b / (groupBytes >> 8)) & ((1<<pipeBits)-1);
++
++        bankSwizzle =
++            (base256b / (groupBytes >> 8) / numPipes / bankInterleave) & ((1 << bankBits) - 1);
++    }
++
++    *pPipeSwizzle = pipeSwizzle;
++    *pBankSwizzle = bankSwizzle;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::GetBankPipeSwizzle
++*   @brief
++*       Combine bank/pipe swizzle
++*   @return
++*       Base256b bits (only filled bank/pipe bits)
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::GetBankPipeSwizzle(
++    UINT_32         bankSwizzle,    ///< [in] bank swizzle
++    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
++    UINT_64         baseAddr,       ///< [in] base address
++    ADDR_TILEINFO*  pTileInfo       ///< [in] tile info
++    ) const
++{
++    UINT_32 pipeBits = QLog2(HwlGetPipes(pTileInfo));
++    UINT_32 bankInterleaveBits = QLog2(m_bankInterleave);
++    UINT_32 tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits);
++
++    baseAddr ^= tileSwizzle * m_pipeInterleaveBytes;
++    baseAddr >>= 8;
++
++    return static_cast<UINT_32>(baseAddr);
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeSliceTileSwizzle
++*   @brief
++*       Compute cubemap/3d texture faces/slices tile swizzle
++*   @return
++*       Tile swizzle
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::ComputeSliceTileSwizzle(
++    AddrTileMode        tileMode,       ///< [in] Tile mode
++    UINT_32             baseSwizzle,    ///< [in] Base swizzle
++    UINT_32             slice,          ///< [in] Slice index, Cubemap face index, 0 means +X
++    UINT_64             baseAddr,       ///< [in] Base address
++    ADDR_TILEINFO* pTileInfo       ///< [in] Bank structure
++    ) const
++{
++    UINT_32 tileSwizzle = 0;
++
++    if (IsMacroTiled(tileMode)) // Swizzle only for macro tile mode
++    {
++        UINT_32 firstSlice = slice / ComputeSurfaceThickness(tileMode);
++
++        UINT_32 numPipes = HwlGetPipes(pTileInfo);
++        UINT_32 numBanks = pTileInfo->banks;
++
++        UINT_32 pipeRotation;
++        UINT_32 bankRotation;
++
++        UINT_32 bankSwizzle = 0;
++        UINT_32 pipeSwizzle = 0;
++
++        pipeRotation = ComputePipeRotation(tileMode, numPipes);
++        bankRotation = ComputeBankRotation(tileMode, numBanks, numPipes);
++
++        if (baseSwizzle != 0)
++        {
++            ExtractBankPipeSwizzle(baseSwizzle,
++                                   pTileInfo,
++                                   &bankSwizzle,
++                                   &pipeSwizzle);
++        }
++
++        if (pipeRotation == 0) //2D mode
++        {
++            bankSwizzle += firstSlice * bankRotation;
++            bankSwizzle %= numBanks;
++        }
++        else //3D mode
++        {
++            pipeSwizzle += firstSlice * pipeRotation;
++            pipeSwizzle %= numPipes;
++            bankSwizzle += firstSlice * bankRotation / numPipes;
++            bankSwizzle %= numBanks;
++        }
++
++        tileSwizzle = GetBankPipeSwizzle(bankSwizzle,
++                                         pipeSwizzle,
++                                         baseAddr,
++                                         pTileInfo);
++    }
++
++    return tileSwizzle;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeQbStereoRightSwizzle
++*
++*   @brief
++*       Compute right eye swizzle
++*   @return
++*       swizzle
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::HwlComputeQbStereoRightSwizzle(
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo  ///< [in] Surface info, must be valid
++    ) const
++{
++    UINT_32 bankBits    = 0;
++    UINT_32 swizzle     = 0;
++
++    // The assumption is default swizzle for left eye is 0
++    if (IsMacroTiled(pInfo->tileMode) && pInfo->pStereoInfo && pInfo->pTileInfo)
++    {
++        bankBits = ComputeBankFromCoord(0, pInfo->height, 0,
++                                        pInfo->tileMode, 0, 0, pInfo->pTileInfo);
++
++        if (bankBits)
++        {
++            HwlCombineBankPipeSwizzle(bankBits, 0, pInfo->pTileInfo, 0, &swizzle);
++        }
++    }
++
++    return swizzle;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeBankFromCoord
++*
++*   @brief
++*       Compute bank number from coordinates
++*   @return
++*       Bank number
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::ComputeBankFromCoord(
++    UINT_32         x,              ///< [in] x coordinate
++    UINT_32         y,              ///< [in] y coordinate
++    UINT_32         slice,          ///< [in] slice index
++    AddrTileMode    tileMode,       ///< [in] tile mode
++    UINT_32         bankSwizzle,    ///< [in] bank swizzle
++    UINT_32         tileSplitSlice, ///< [in] If the size of the pixel offset is larger than the
++                                    ///  tile split size, then the pixel will be moved to a separate
++                                    ///  slice. This value equals pixelOffset / tileSplitBytes
++                                    ///  in this case. Otherwise this is 0.
++    ADDR_TILEINFO*  pTileInfo       ///< [in] tile info
++    ) const
++{
++    UINT_32 pipes = HwlGetPipes(pTileInfo);
++    UINT_32 bankBit0 = 0;
++    UINT_32 bankBit1 = 0;
++    UINT_32 bankBit2 = 0;
++    UINT_32 bankBit3 = 0;
++    UINT_32 sliceRotation;
++    UINT_32 tileSplitRotation;
++    UINT_32 bank;
++    UINT_32 numBanks    = pTileInfo->banks;
++    UINT_32 bankWidth   = pTileInfo->bankWidth;
++    UINT_32 bankHeight  = pTileInfo->bankHeight;
++
++    UINT_32 tx = x / MicroTileWidth / (bankWidth * pipes);
++    UINT_32 ty = y / MicroTileHeight / bankHeight;
++
++    UINT_32 x3 = _BIT(tx,0);
++    UINT_32 x4 = _BIT(tx,1);
++    UINT_32 x5 = _BIT(tx,2);
++    UINT_32 x6 = _BIT(tx,3);
++    UINT_32 y3 = _BIT(ty,0);
++    UINT_32 y4 = _BIT(ty,1);
++    UINT_32 y5 = _BIT(ty,2);
++    UINT_32 y6 = _BIT(ty,3);
++
++    switch (numBanks)
++    {
++        case 16:
++            bankBit0 = x3 ^ y6;
++            bankBit1 = x4 ^ y5 ^ y6;
++            bankBit2 = x5 ^ y4;
++            bankBit3 = x6 ^ y3;
++            break;
++        case 8:
++            bankBit0 = x3 ^ y5;
++            bankBit1 = x4 ^ y4 ^ y5;
++            bankBit2 = x5 ^ y3;
++            break;
++        case 4:
++            bankBit0 = x3 ^ y4;
++            bankBit1 = x4 ^ y3;
++            break;
++        case 2:
++            bankBit0 = x3 ^ y3;
++            break;
++        default:
++            ADDR_ASSERT_ALWAYS();
++            break;
++    }
++
++    bank = bankBit0 | (bankBit1 << 1) | (bankBit2 << 2) | (bankBit3 << 3);
++
++    //Bits2Number(4, bankBit3, bankBit2, bankBit1, bankBit0);
++
++    bank = HwlPreAdjustBank((x / MicroTileWidth), bank, pTileInfo);
++    //
++    // Compute bank rotation for the slice.
++    //
++    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
++
++    switch (tileMode)
++    {
++        case ADDR_TM_2D_TILED_THIN1:  // fall through
++        case ADDR_TM_2D_TILED_THICK:  // fall through
++        case ADDR_TM_2D_TILED_XTHICK:
++            sliceRotation = ((numBanks / 2) - 1) * (slice / microTileThickness);
++            break;
++        case ADDR_TM_3D_TILED_THIN1:  // fall through
++        case ADDR_TM_3D_TILED_THICK:  // fall through
++        case ADDR_TM_3D_TILED_XTHICK:
++            sliceRotation =
++                Max(1u, (pipes / 2) - 1) * (slice / microTileThickness) / pipes;
++            break;
++        default:
++            sliceRotation =  0;
++            break;
++    }
++
++
++    //
++    // Compute bank rotation for the tile split slice.
++    //
++    // The sample slice will be non-zero if samples must be split across multiple slices.
++    // This situation arises when the micro tile size multiplied yBit the number of samples exceeds
++    // the split size (set in GB_ADDR_CONFIG).
++    //
++    switch (tileMode)
++    {
++        case ADDR_TM_2D_TILED_THIN1: //fall through
++        case ADDR_TM_3D_TILED_THIN1: //fall through
++        case ADDR_TM_PRT_2D_TILED_THIN1: //fall through
++        case ADDR_TM_PRT_3D_TILED_THIN1: //fall through
++            tileSplitRotation = ((numBanks / 2) + 1) * tileSplitSlice;
++            break;
++        default:
++            tileSplitRotation =  0;
++            break;
++    }
++
++    //
++    // Apply bank rotation for the slice and tile split slice.
++    //
++    bank ^= bankSwizzle + sliceRotation;
++    bank ^= tileSplitRotation;
++
++    bank &= (numBanks - 1);
++
++    return bank;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeBankFromAddr
++*
++*   @brief
++*       Compute the bank number from an address
++*   @return
++*       Bank number
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::ComputeBankFromAddr(
++    UINT_64 addr,       ///< [in] address
++    UINT_32 numBanks,   ///< [in] number of banks
++    UINT_32 numPipes    ///< [in] number of pipes
++    ) const
++{
++    UINT_32 bank;
++
++    //
++    // The LSBs of the address are arranged as follows:
++    //   bank | bankInterleave | pipe | pipeInterleave
++    //
++    // To get the bank number, shift off the pipe interleave, pipe, and bank interlave bits and
++    // mask the bank bits.
++    //
++    bank = static_cast<UINT_32>(
++        (addr >> Log2(m_pipeInterleaveBytes * numPipes * m_bankInterleave)) &
++        (numBanks - 1)
++        );
++
++    return bank;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputePipeRotation
++*
++*   @brief
++*       Compute pipe rotation value
++*   @return
++*       Pipe rotation
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::ComputePipeRotation(
++    AddrTileMode tileMode,  ///< [in] tile mode
++    UINT_32      numPipes   ///< [in] number of pipes
++    ) const
++{
++   UINT_32 rotation;
++
++    switch (tileMode)
++    {
++        case ADDR_TM_3D_TILED_THIN1:        //fall through
++        case ADDR_TM_3D_TILED_THICK:        //fall through
++        case ADDR_TM_3D_TILED_XTHICK:       //fall through
++        case ADDR_TM_PRT_3D_TILED_THIN1:    //fall through
++        case ADDR_TM_PRT_3D_TILED_THICK:
++            rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);
++            break;
++        default:
++            rotation = 0;
++    }
++
++    return rotation;
++}
++
++
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeBankRotation
++*
++*   @brief
++*       Compute bank rotation value
++*   @return
++*       Bank rotation
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::ComputeBankRotation(
++    AddrTileMode tileMode,  ///< [in] tile mode
++    UINT_32      numBanks,  ///< [in] number of banks
++    UINT_32      numPipes   ///< [in] number of pipes
++    ) const
++{
++    UINT_32 rotation;
++
++    switch (tileMode)
++    {
++        case ADDR_TM_2D_TILED_THIN1: // fall through
++        case ADDR_TM_2D_TILED_THICK: // fall through
++        case ADDR_TM_2D_TILED_XTHICK:
++        case ADDR_TM_PRT_2D_TILED_THIN1:
++        case ADDR_TM_PRT_2D_TILED_THICK:
++            // Rotate banks per Z-slice yBit 1 for 4-bank or 3 for 8-bank
++            rotation =  numBanks / 2 - 1;
++            break;
++        case ADDR_TM_3D_TILED_THIN1: // fall through
++        case ADDR_TM_3D_TILED_THICK: // fall through
++        case ADDR_TM_3D_TILED_XTHICK:
++        case ADDR_TM_PRT_3D_TILED_THIN1:
++        case ADDR_TM_PRT_3D_TILED_THICK:
++            rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);    // rotate pipes & banks
++            break;
++        default:
++            rotation = 0;
++    }
++
++    return rotation;
++}
++
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeHtileBytes
++*
++*   @brief
++*       Compute htile size in bytes
++*
++*   @return
++*       Htile size in bytes
++***************************************************************************************************
++*/
++UINT_64 EgBasedAddrLib::ComputeHtileBytes(
++    UINT_32 pitch,        ///< [in] pitch
++    UINT_32 height,       ///< [in] height
++    UINT_32 bpp,          ///< [in] bits per pixel
++    BOOL_32 isLinear,     ///< [in] if it is linear mode
++    UINT_32 numSlices,    ///< [in] number of slices
++    UINT_64* sliceBytes,  ///< [out] bytes per slice
++    UINT_32 baseAlign     ///< [in] base alignments
++    ) const
++{
++    UINT_64 surfBytes;
++
++    const UINT_64 HtileCacheLineSize = BITS_TO_BYTES(HtileCacheBits);
++
++    *sliceBytes = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp / 64);
++
++    if (m_configFlags.useHtileSliceAlign)
++    {
++        // Align the sliceSize to htilecachelinesize * pipes at first
++        *sliceBytes = PowTwoAlign(*sliceBytes, HtileCacheLineSize * m_pipes);
++        surfBytes  = *sliceBytes * numSlices;
++    }
++    else
++    {
++        // Align the surfSize to htilecachelinesize * pipes at last
++        surfBytes  = *sliceBytes * numSlices;
++        surfBytes  = PowTwoAlign(surfBytes, HtileCacheLineSize * m_pipes);
++    }
++
++    return surfBytes;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::DispatchComputeFmaskInfo
++*
++*   @brief
++*       Compute fmask sizes include padded pitch, height, slices, total size in bytes,
++*       meanwhile output suitable tile mode and alignments as well. Results are returned
++*       through output parameters.
++*
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::DispatchComputeFmaskInfo(
++    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
++    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut)  ///< [out] output structure
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++    ADDR_COMPUTE_SURFACE_INFO_INPUT  surfIn     = {0};
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT surfOut    = {0};
++
++    // Setup input structure
++    surfIn.tileMode          = pIn->tileMode;
++    surfIn.width             = pIn->pitch;
++    surfIn.height            = pIn->height;
++    surfIn.numSlices         = pIn->numSlices;
++    surfIn.pTileInfo         = pIn->pTileInfo;
++    surfIn.tileType          = ADDR_NON_DISPLAYABLE;
++    surfIn.flags.fmask       = 1;
++
++    // Setup output structure
++    surfOut.pTileInfo       = pOut->pTileInfo;
++
++    // Setup hwl specific fields
++    HwlFmaskPreThunkSurfInfo(pIn, pOut, &surfIn, &surfOut);
++
++    surfIn.bpp = HwlComputeFmaskBits(pIn, &surfIn.numSamples);
++
++    // ComputeSurfaceInfo needs numSamples in surfOut as surface routines need adjusted numSamples
++    surfOut.numSamples = surfIn.numSamples;
++
++    retCode = HwlComputeSurfaceInfo(&surfIn, &surfOut);
++
++    // Save bpp field for surface dump support
++    surfOut.bpp = surfIn.bpp;
++
++    if (retCode == ADDR_OK)
++    {
++        pOut->bpp               = surfOut.bpp;
++        pOut->pitch             = surfOut.pitch;
++        pOut->height            = surfOut.height;
++        pOut->numSlices         = surfOut.depth;
++        pOut->fmaskBytes        = surfOut.surfSize;
++        pOut->baseAlign         = surfOut.baseAlign;
++        pOut->pitchAlign        = surfOut.pitchAlign;
++        pOut->heightAlign       = surfOut.heightAlign;
++
++        if (surfOut.depth > 1)
++        {
++            // For fmask, expNumSlices is stored in depth.
++            pOut->sliceSize = surfOut.surfSize / surfOut.depth;
++        }
++        else
++        {
++            pOut->sliceSize = surfOut.surfSize;
++        }
++
++        // Save numSamples field for surface dump support
++        pOut->numSamples        = surfOut.numSamples;
++
++        HwlFmaskPostThunkSurfInfo(&surfOut, pOut);
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlFmaskSurfaceInfo
++*   @brief
++*       Entry of EgBasedAddrLib ComputeFmaskInfo
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskInfo(
++    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
++    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut   ///< [out] output structure
++    )
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++    ADDR_TILEINFO tileInfo = {0};
++
++    // Use internal tile info if pOut does not have a valid pTileInfo
++    if (pOut->pTileInfo == NULL)
++    {
++        pOut->pTileInfo = &tileInfo;
++    }
++
++    retCode = DispatchComputeFmaskInfo(pIn, pOut);
++
++    if (retCode == ADDR_OK)
++    {
++        pOut->tileIndex =
++            HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
++                                  pOut->tileIndex);
++    }
++
++    // Resets pTileInfo to NULL if the internal tile info is used
++    if (pOut->pTileInfo == &tileInfo)
++    {
++        pOut->pTileInfo = NULL;
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeFmaskAddrFromCoord
++*   @brief
++*       Entry of EgBasedAddrLib ComputeFmaskAddrFromCoord
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskAddrFromCoord(
++    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++#if ADDR_AM_BUILD
++    if ((pIn->x > pIn->pitch)               ||
++        (pIn->y > pIn->height)              ||
++        (pIn->numSamples > m_maxSamples)    ||
++        (pIn->sample >= m_maxSamples))
++    {
++        retCode = ADDR_INVALIDPARAMS;
++    }
++    else
++    {
++        pOut->addr = DispatchComputeFmaskAddrFromCoord(pIn, pOut);
++    }
++#endif
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeFmaskCoordFromAddr
++*   @brief
++*       Entry of EgBasedAddrLib ComputeFmaskCoordFromAddr
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskCoordFromAddr(
++    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++#if ADDR_AM_BUILD
++    if ((pIn->bitPosition >= 8) ||
++        (pIn->numSamples > m_maxSamples))
++    {
++        retCode = ADDR_INVALIDPARAMS;
++    }
++    else
++    {
++        DispatchComputeFmaskCoordFromAddr(pIn, pOut);
++    }
++#endif
++
++    return retCode;
++}
++
++#if ADDR_AM_BUILD
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::DispatchComputeFmaskAddrFromCoord
++*
++*   @brief
++*       Computes the FMASK address and bit position from a coordinate.
++*   @return
++*       The byte address
++***************************************************************************************************
++*/
++UINT_64 EgBasedAddrLib::DispatchComputeFmaskAddrFromCoord(
++    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    UINT_32             x                 = pIn->x;
++    UINT_32             y                 = pIn->y;
++    UINT_32             slice             = pIn->slice;
++    UINT_32             sample            = pIn->sample;
++    UINT_32             plane             = pIn->plane;
++    UINT_32             pitch             = pIn->pitch;
++    UINT_32             height            = pIn->height;
++    UINT_32             numSamples        = pIn->numSamples;
++    AddrTileMode        tileMode          = pIn->tileMode;
++    BOOL_32             ignoreSE          = pIn->ignoreSE;
++    ADDR_TILEINFO*      pTileInfo         = pIn->pTileInfo;
++    BOOL_32             resolved          = pIn->resolved;
++
++    UINT_32* pBitPosition = &pOut->bitPosition;
++    UINT_64 addr          = 0;
++
++    ADDR_ASSERT(numSamples > 1);
++    ADDR_ASSERT(ComputeSurfaceThickness(tileMode) == 1);
++
++    switch (tileMode)
++    {
++        case ADDR_TM_1D_TILED_THIN1:
++            addr = ComputeFmaskAddrFromCoordMicroTiled(x,
++                                                       y,
++                                                       slice,
++                                                       sample,
++                                                       plane,
++                                                       pitch,
++                                                       height,
++                                                       numSamples,
++                                                       tileMode,
++                                                       resolved,
++                                                       pBitPosition);
++            break;
++        case ADDR_TM_2D_TILED_THIN1: //fall through
++        case ADDR_TM_3D_TILED_THIN1:
++            UINT_32 pipeSwizzle;
++            UINT_32 bankSwizzle;
++
++            if (m_configFlags.useCombinedSwizzle)
++            {
++                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
++                                       &bankSwizzle, &pipeSwizzle);
++            }
++            else
++            {
++                pipeSwizzle = pIn->pipeSwizzle;
++                bankSwizzle = pIn->bankSwizzle;
++            }
++
++            addr = ComputeFmaskAddrFromCoordMacroTiled(x,
++                                                       y,
++                                                       slice,
++                                                       sample,
++                                                       plane,
++                                                       pitch,
++                                                       height,
++                                                       numSamples,
++                                                       tileMode,
++                                                       pipeSwizzle,
++                                                       bankSwizzle,
++                                                       ignoreSE,
++                                                       pTileInfo,
++                                                       resolved,
++                                                       pBitPosition);
++            break;
++        default:
++            *pBitPosition = 0;
++            break;
++    }
++
++    return addr;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeFmaskAddrFromCoordMicroTiled
++*
++*   @brief
++*       Computes the FMASK address and bit position from a coordinate for 1D tilied (micro
++*       tiled)
++*   @return
++*       The byte address
++***************************************************************************************************
++*/
++UINT_64 EgBasedAddrLib::ComputeFmaskAddrFromCoordMicroTiled(
++    UINT_32             x,              ///< [in] x coordinate
++    UINT_32             y,              ///< [in] y coordinate
++    UINT_32             slice,          ///< [in] slice index
++    UINT_32             sample,         ///< [in] sample number
++    UINT_32             plane,          ///< [in] plane number
++    UINT_32             pitch,          ///< [in] surface pitch in pixels
++    UINT_32             height,         ///< [in] surface height in pixels
++    UINT_32             numSamples,     ///< [in] number of samples
++    AddrTileMode        tileMode,       ///< [in] tile mode
++    BOOL_32             resolved,       ///< [in] TRUE if this is for resolved fmask
++    UINT_32*            pBitPosition    ///< [out] pointer to returned bit position
++    ) const
++{
++    UINT_64 addr = 0;
++    UINT_32 effectiveBpp;
++    UINT_32 effectiveSamples;
++
++    //
++    // 2xAA use the same layout as 4xAA
++    //
++    if (numSamples == 2)
++    {
++        numSamples = 4;
++    }
++
++    //
++    // Compute the number of planes.
++    //
++    if (!resolved)
++    {
++        effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);;
++        effectiveBpp = numSamples;
++
++        //
++        // Compute the address just like a color surface with numSamples bits per element and
++        // numPlanes samples.
++        //
++        addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
++                                                     y,
++                                                     slice,
++                                                     plane, // sample
++                                                     effectiveBpp,
++                                                     pitch,
++                                                     height,
++                                                     effectiveSamples,
++                                                     tileMode,
++                                                     ADDR_NON_DISPLAYABLE,
++                                                     FALSE,
++                                                     pBitPosition);
++
++        //
++        // Compute the real bit position. Each (sample, plane) is stored with one bit per sample.
++        //
++
++        //
++        // Compute the pixel index with in the micro tile
++        //
++        UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x % 8,
++                                                              y % 8,
++                                                              slice,
++                                                              1,
++                                                              tileMode,
++                                                              ADDR_NON_DISPLAYABLE);
++
++        *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1);
++
++        UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition;
++
++        addr = bitAddr / 8;
++    }
++    else
++    {
++        effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
++        effectiveSamples = 1;
++
++        //
++        // Compute the address just like a color surface with numSamples bits per element and
++        // numPlanes samples.
++        //
++        addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
++                                                     y,
++                                                     slice,
++                                                     sample,
++                                                     effectiveBpp,
++                                                     pitch,
++                                                     height,
++                                                     effectiveSamples,
++                                                     tileMode,
++                                                     ADDR_NON_DISPLAYABLE,
++                                                     TRUE,
++                                                     pBitPosition);
++    }
++
++    return addr;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeFmaskAddrFromCoordMacroTiled
++*
++*   @brief
++*       Computes the FMASK address and bit position from a coordinate for 2D tilied (macro
++*       tiled)
++*   @return
++*       The byte address
++***************************************************************************************************
++*/
++UINT_64 EgBasedAddrLib::ComputeFmaskAddrFromCoordMacroTiled(
++    UINT_32             x,              ///< [in] x coordinate
++    UINT_32             y,              ///< [in] y coordinate
++    UINT_32             slice,          ///< [in] slice index
++    UINT_32             sample,         ///< [in] sample number
++    UINT_32             plane,          ///< [in] plane number
++    UINT_32             pitch,          ///< [in] surface pitch in pixels
++    UINT_32             height,         ///< [in] surface height in pixels
++    UINT_32             numSamples,     ///< [in] number of samples
++    AddrTileMode        tileMode,       ///< [in] tile mode
++    UINT_32             pipeSwizzle,    ///< [in] pipe swizzle
++    UINT_32             bankSwizzle,    ///< [in] bank swizzle
++    BOOL_32             ignoreSE,       ///< [in] TRUE if ignore shader engine
++    ADDR_TILEINFO*      pTileInfo,      ///< [in] bank structure.**All fields to be valid on entry**
++    BOOL_32             resolved,       ///< [in] TRUE if this is for resolved fmask
++    UINT_32*            pBitPosition    ///< [out] pointer to returned bit position
++    ) const
++{
++    UINT_64 addr = 0;
++    UINT_32 effectiveBpp;
++    UINT_32 effectiveSamples;
++
++    //
++    // 2xAA use the same layout as 4xAA
++    //
++    if (numSamples == 2)
++    {
++        numSamples = 4;
++    }
++
++    //
++    // Compute the number of planes.
++    //
++    if (!resolved)
++    {
++        effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
++        effectiveBpp = numSamples;
++
++        //
++        // Compute the address just like a color surface with numSamples bits per element and
++        // numPlanes samples.
++        //
++        addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
++                                                     y,
++                                                     slice,
++                                                     plane, // sample
++                                                     effectiveBpp,
++                                                     pitch,
++                                                     height,
++                                                     effectiveSamples,
++                                                     tileMode,
++                                                     ADDR_NON_DISPLAYABLE,// isdisp
++                                                     ignoreSE,// ignore_shader
++                                                     FALSE,// depth_sample_order
++                                                     pipeSwizzle,
++                                                     bankSwizzle,
++                                                     pTileInfo,
++                                                     pBitPosition);
++
++        //
++        // Compute the real bit position. Each (sample, plane) is stored with one bit per sample.
++        //
++
++
++        //
++        // Compute the pixel index with in the micro tile
++        //
++        UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x ,
++                                                              y ,
++                                                              slice,
++                                                              effectiveBpp,
++                                                              tileMode,
++                                                              ADDR_NON_DISPLAYABLE);
++
++        *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1);
++
++        UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition;
++
++        addr = bitAddr / 8;
++
++    }
++    else
++    {
++        effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
++        effectiveSamples = 1;
++
++        //
++        // Compute the address just like a color surface with numSamples bits per element and
++        // numPlanes samples.
++        //
++        addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
++                                                     y,
++                                                     slice,
++                                                     sample,
++                                                     effectiveBpp,
++                                                     pitch,
++                                                     height,
++                                                     effectiveSamples,
++                                                     tileMode,
++                                                     ADDR_NON_DISPLAYABLE,
++                                                     ignoreSE,
++                                                     TRUE,
++                                                     pipeSwizzle,
++                                                     bankSwizzle,
++                                                     pTileInfo,
++                                                     pBitPosition);
++    }
++
++    return addr;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeFmaskCoordFromAddrMicroTiled
++*
++*   @brief
++*       Compute (x,y,slice,sample,plane) coordinates from fmask address
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID EgBasedAddrLib::ComputeFmaskCoordFromAddrMicroTiled(
++    UINT_64             addr,       ///< [in] byte address
++    UINT_32             bitPosition,///< [in] bit position
++    UINT_32             pitch,      ///< [in] pitch in pixels
++    UINT_32             height,     ///< [in] height in pixels
++    UINT_32             numSamples, ///< [in] number of samples (of color buffer)
++    AddrTileMode        tileMode,   ///< [in] tile mode
++    BOOL_32             resolved,   ///< [in] TRUE if it is resolved fmask
++    UINT_32*            pX,         ///< [out] X coord
++    UINT_32*            pY,         ///< [out] Y coord
++    UINT_32*            pSlice,     ///< [out] slice index
++    UINT_32*            pSample,    ///< [out] sample index
++    UINT_32*            pPlane      ///< [out] plane index
++    ) const
++{
++    UINT_32 effectiveBpp;
++    UINT_32 effectiveSamples;
++
++    // 2xAA use the same layout as 4xAA
++    if (numSamples == 2)
++    {
++        numSamples = 4;
++    }
++
++    if (!resolved)
++    {
++        effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
++        effectiveBpp  = numSamples;
++
++        ComputeSurfaceCoordFromAddrMicroTiled(addr,
++                                              bitPosition,
++                                              effectiveBpp,
++                                              pitch,
++                                              height,
++                                              effectiveSamples,
++                                              tileMode,
++                                              0, // tileBase
++                                              0, // compBits
++                                              pX,
++                                              pY,
++                                              pSlice,
++                                              pPlane,
++                                              ADDR_NON_DISPLAYABLE, // microTileType
++                                              FALSE  // isDepthSampleOrder
++                                              );
++
++
++        if ( pSample )
++        {
++            *pSample = bitPosition % numSamples;
++        }
++    }
++    else
++    {
++        effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
++        effectiveSamples = 1;
++
++        ComputeSurfaceCoordFromAddrMicroTiled(addr,
++                                              bitPosition,
++                                              effectiveBpp,
++                                              pitch,
++                                              height,
++                                              effectiveSamples,
++                                              tileMode,
++                                              0,     // tileBase
++                                              0,     // compBits
++                                              pX,
++                                              pY,
++                                              pSlice,
++                                              pSample,
++                                              ADDR_NON_DISPLAYABLE, // microTileType
++                                              TRUE   // isDepthSampleOrder
++                                              );
++    }
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeFmaskCoordFromAddrMacroTiled
++*
++*   @brief
++*       Compute (x,y,slice,sample,plane) coordinates from
++*       fmask address
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID EgBasedAddrLib::ComputeFmaskCoordFromAddrMacroTiled(
++    UINT_64             addr,       ///< [in] byte address
++    UINT_32             bitPosition,///< [in] bit position
++    UINT_32             pitch,      ///< [in] pitch in pixels
++    UINT_32             height,     ///< [in] height in pixels
++    UINT_32             numSamples, ///< [in] number of samples (of color buffer)
++    AddrTileMode        tileMode,   ///< [in] tile mode
++    UINT_32             pipeSwizzle,///< [in] pipe swizzle
++    UINT_32             bankSwizzle,///< [in] bank swizzle
++    BOOL_32             ignoreSE,   ///< [in] TRUE if ignore shader engine
++    ADDR_TILEINFO*      pTileInfo,  ///< [in] bank structure. **All fields to be valid on entry**
++    BOOL_32             resolved,   ///< [in] TRUE if it is resolved fmask
++    UINT_32*            pX,         ///< [out] X coord
++    UINT_32*            pY,         ///< [out] Y coord
++    UINT_32*            pSlice,     ///< [out] slice index
++    UINT_32*            pSample,    ///< [out] sample index
++    UINT_32*            pPlane      ///< [out] plane index
++    ) const
++{
++    UINT_32 effectiveBpp;
++    UINT_32 effectiveSamples;
++
++    // 2xAA use the same layout as 4xAA
++    if (numSamples == 2)
++    {
++        numSamples = 4;
++    }
++
++    //
++    // Compute the number of planes.
++    //
++    if (!resolved)
++    {
++        effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
++        effectiveBpp  = numSamples;
++
++        ComputeSurfaceCoordFromAddrMacroTiled(addr,
++                                              bitPosition,
++                                              effectiveBpp,
++                                              pitch,
++                                              height,
++                                              effectiveSamples,
++                                              tileMode,
++                                              0, // No tileBase
++                                              0, // No compBits
++                                              ADDR_NON_DISPLAYABLE,
++                                              ignoreSE,
++                                              FALSE,
++                                              pipeSwizzle,
++                                              bankSwizzle,
++                                              pTileInfo,
++                                              pX,
++                                              pY,
++                                              pSlice,
++                                              pPlane);
++
++        if (pSample)
++        {
++            *pSample = bitPosition % numSamples;
++        }
++    }
++    else
++    {
++        effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
++        effectiveSamples = 1;
++
++        ComputeSurfaceCoordFromAddrMacroTiled(addr,
++                                              bitPosition,
++                                              effectiveBpp,
++                                              pitch,
++                                              height,
++                                              effectiveSamples,
++                                              tileMode,
++                                              0, // No tileBase
++                                              0, // No compBits
++                                              ADDR_NON_DISPLAYABLE,
++                                              ignoreSE,
++                                              TRUE,
++                                              pipeSwizzle,
++                                              bankSwizzle,
++                                              pTileInfo,
++                                              pX,
++                                              pY,
++                                              pSlice,
++                                              pSample);
++    }
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::DispatchComputeFmaskCoordFromAddr
++*
++*   @brief
++*       Compute (x,y,slice,sample,plane) coordinates from
++*       fmask address
++*   @return
++*       N/A
++*
++***************************************************************************************************
++*/
++VOID EgBasedAddrLib::DispatchComputeFmaskCoordFromAddr(
++    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
++    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
++    ) const
++{
++    UINT_64             addr              = pIn->addr;
++    UINT_32             bitPosition       = pIn->bitPosition;
++    UINT_32             pitch             = pIn->pitch;
++    UINT_32             height            = pIn->height;
++    UINT_32             numSamples        = pIn->numSamples;
++    AddrTileMode        tileMode          = pIn->tileMode;
++    BOOL_32             ignoreSE          = pIn->ignoreSE;
++    ADDR_TILEINFO*      pTileInfo         = pIn->pTileInfo;
++    BOOL_32             resolved          = pIn->resolved;
++
++    UINT_32*            pX      = &pOut->x;
++    UINT_32*            pY      = &pOut->y;
++    UINT_32*            pSlice  = &pOut->slice;
++    UINT_32*            pSample = &pOut->sample;
++    UINT_32*            pPlane  = &pOut->plane;
++
++    switch (tileMode)
++    {
++        case ADDR_TM_1D_TILED_THIN1:
++            ComputeFmaskCoordFromAddrMicroTiled(addr,
++                                                bitPosition,
++                                                pitch,
++                                                height,
++                                                numSamples,
++                                                tileMode,
++                                                resolved,
++                                                pX,
++                                                pY,
++                                                pSlice,
++                                                pSample,
++                                                pPlane);
++            break;
++        case ADDR_TM_2D_TILED_THIN1://fall through
++        case ADDR_TM_3D_TILED_THIN1:
++            UINT_32 pipeSwizzle;
++            UINT_32 bankSwizzle;
++
++            if (m_configFlags.useCombinedSwizzle)
++            {
++                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
++                                       &bankSwizzle, &pipeSwizzle);
++            }
++            else
++            {
++                pipeSwizzle = pIn->pipeSwizzle;
++                bankSwizzle = pIn->bankSwizzle;
++            }
++
++            ComputeFmaskCoordFromAddrMacroTiled(addr,
++                                                bitPosition,
++                                                pitch,
++                                                height,
++                                                numSamples,
++                                                tileMode,
++                                                pipeSwizzle,
++                                                bankSwizzle,
++                                                ignoreSE,
++                                                pTileInfo,
++                                                resolved,
++                                                pX,
++                                                pY,
++                                                pSlice,
++                                                pSample,
++                                                pPlane);
++            break;
++        default:
++            ADDR_ASSERT_ALWAYS();
++            break;
++
++    }
++}
++#endif
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeFmaskNumPlanesFromNumSamples
++*
++*   @brief
++*       Compute fmask number of planes from number of samples
++*
++*   @return
++*       Number of planes
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::ComputeFmaskNumPlanesFromNumSamples(
++    UINT_32 numSamples)     ///< [in] number of samples
++{
++    UINT_32 numPlanes;
++
++    //
++    // FMASK is stored such that each micro tile is composed of elements containing N bits, where
++    // N is the number of samples.  There is a micro tile for each bit in the FMASK address, and
++    // micro tiles for each address bit, sometimes referred to as a plane, are stored sequentially.
++    // The FMASK for a 2-sample surface looks like a general surface with 2 bits per element.
++    // The FMASK for a 4-sample surface looks like a general surface with 4 bits per element and
++    // 2 samples.  The FMASK for an 8-sample surface looks like a general surface with 8 bits per
++    // element and 4 samples.  R6xx and R7xx only stored 3 planes for 8-sample FMASK surfaces.
++    // This was changed for R8xx to simplify the logic in the CB.
++    //
++    switch (numSamples)
++    {
++        case 2:
++            numPlanes = 1;
++            break;
++        case 4:
++            numPlanes = 2;
++            break;
++        case 8:
++            numPlanes = 4;
++            break;
++        default:
++            ADDR_UNHANDLED_CASE();
++            numPlanes = 0;
++            break;
++    }
++    return numPlanes;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::ComputeFmaskResolvedBppFromNumSamples
++*
++*   @brief
++*       Compute resolved fmask effective bpp based on number of samples
++*
++*   @return
++*       bpp
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::ComputeFmaskResolvedBppFromNumSamples(
++    UINT_32 numSamples)     ///< number of samples
++{
++    UINT_32 bpp;
++
++    //
++    // Resolved FMASK surfaces are generated yBit the CB and read yBit the texture unit
++    // so that the texture unit can read compressed multi-sample color data.
++    // These surfaces store each index value packed per element.
++    // Each element contains at least num_samples * log2(num_samples) bits.
++    // Resolved FMASK surfaces are addressed as follows:
++    // 2-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
++    // 4-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
++    // 8-sample Addressed similarly to a color surface with 32 bits per element and 1 sample.
++
++    switch (numSamples)
++    {
++        case 2:
++            bpp = 8;
++            break;
++        case 4:
++            bpp = 8;
++            break;
++        case 8:
++            bpp = 32;
++            break;
++        default:
++            ADDR_UNHANDLED_CASE();
++            bpp = 0;
++            break;
++    }
++    return bpp;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::IsTileInfoAllZero
++*
++*   @brief
++*       Return TRUE if all field are zero
++*   @note
++*       Since NULL input is consider to be all zero
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::IsTileInfoAllZero(
++    ADDR_TILEINFO* pTileInfo)
++{
++    BOOL_32 allZero = TRUE;
++
++    if (pTileInfo)
++    {
++        if ((pTileInfo->banks            != 0)  ||
++            (pTileInfo->bankWidth        != 0)  ||
++            (pTileInfo->bankHeight       != 0)  ||
++            (pTileInfo->macroAspectRatio != 0)  ||
++            (pTileInfo->tileSplitBytes   != 0)  ||
++            (pTileInfo->pipeConfig       != 0)
++            )
++        {
++            allZero = FALSE;
++        }
++    }
++
++    return allZero;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlTileInfoEqual
++*
++*   @brief
++*       Return TRUE if all field are equal
++*   @note
++*       Only takes care of current HWL's data
++***************************************************************************************************
++*/
++BOOL_32 EgBasedAddrLib::HwlTileInfoEqual(
++    const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
++    const ADDR_TILEINFO* pRight ///<[in] Right compare operand
++    ) const
++{
++    BOOL_32 equal = FALSE;
++
++    if (pLeft->banks == pRight->banks           &&
++        pLeft->bankWidth == pRight->bankWidth   &&
++        pLeft->bankHeight == pRight->bankHeight &&
++        pLeft->macroAspectRatio == pRight->macroAspectRatio &&
++        pLeft->tileSplitBytes == pRight->tileSplitBytes)
++    {
++        equal = TRUE;
++    }
++
++    return equal;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlConvertTileInfoToHW
++*   @brief
++*       Entry of EgBasedAddrLib ConvertTileInfoToHW
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlConvertTileInfoToHW(
++    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
++    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE retCode   = ADDR_OK;
++
++    ADDR_TILEINFO *pTileInfoIn  = pIn->pTileInfo;
++    ADDR_TILEINFO *pTileInfoOut = pOut->pTileInfo;
++
++    if ((pTileInfoIn != NULL) && (pTileInfoOut != NULL))
++    {
++        if (pIn->reverse == FALSE)
++        {
++            switch (pTileInfoIn->banks)
++            {
++                case 2:
++                    pTileInfoOut->banks = 0;
++                    break;
++                case 4:
++                    pTileInfoOut->banks = 1;
++                    break;
++                case 8:
++                    pTileInfoOut->banks = 2;
++                    break;
++                case 16:
++                    pTileInfoOut->banks = 3;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->banks = 0;
++                    break;
++            }
++
++            switch (pTileInfoIn->bankWidth)
++            {
++                case 1:
++                    pTileInfoOut->bankWidth = 0;
++                    break;
++                case 2:
++                    pTileInfoOut->bankWidth = 1;
++                    break;
++                case 4:
++                    pTileInfoOut->bankWidth = 2;
++                    break;
++                case 8:
++                    pTileInfoOut->bankWidth = 3;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->bankWidth = 0;
++                    break;
++            }
++
++            switch (pTileInfoIn->bankHeight)
++            {
++                case 1:
++                    pTileInfoOut->bankHeight = 0;
++                    break;
++                case 2:
++                    pTileInfoOut->bankHeight = 1;
++                    break;
++                case 4:
++                    pTileInfoOut->bankHeight = 2;
++                    break;
++                case 8:
++                    pTileInfoOut->bankHeight = 3;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->bankHeight = 0;
++                    break;
++            }
++
++            switch (pTileInfoIn->macroAspectRatio)
++            {
++                case 1:
++                    pTileInfoOut->macroAspectRatio = 0;
++                    break;
++                case 2:
++                    pTileInfoOut->macroAspectRatio = 1;
++                    break;
++                case 4:
++                    pTileInfoOut->macroAspectRatio = 2;
++                    break;
++                case 8:
++                    pTileInfoOut->macroAspectRatio = 3;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->macroAspectRatio = 0;
++                    break;
++            }
++
++            switch (pTileInfoIn->tileSplitBytes)
++            {
++                case 64:
++                    pTileInfoOut->tileSplitBytes = 0;
++                    break;
++                case 128:
++                    pTileInfoOut->tileSplitBytes = 1;
++                    break;
++                case 256:
++                    pTileInfoOut->tileSplitBytes = 2;
++                    break;
++                case 512:
++                    pTileInfoOut->tileSplitBytes = 3;
++                    break;
++                case 1024:
++                    pTileInfoOut->tileSplitBytes = 4;
++                    break;
++                case 2048:
++                    pTileInfoOut->tileSplitBytes = 5;
++                    break;
++                case 4096:
++                    pTileInfoOut->tileSplitBytes = 6;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->tileSplitBytes = 0;
++                    break;
++            }
++        }
++        else
++        {
++            switch (pTileInfoIn->banks)
++            {
++                case 0:
++                    pTileInfoOut->banks = 2;
++                    break;
++                case 1:
++                    pTileInfoOut->banks = 4;
++                    break;
++                case 2:
++                    pTileInfoOut->banks = 8;
++                    break;
++                case 3:
++                    pTileInfoOut->banks = 16;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->banks = 2;
++                    break;
++            }
++
++            switch (pTileInfoIn->bankWidth)
++            {
++                case 0:
++                    pTileInfoOut->bankWidth = 1;
++                    break;
++                case 1:
++                    pTileInfoOut->bankWidth = 2;
++                    break;
++                case 2:
++                    pTileInfoOut->bankWidth = 4;
++                    break;
++                case 3:
++                    pTileInfoOut->bankWidth = 8;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->bankWidth = 1;
++                    break;
++            }
++
++            switch (pTileInfoIn->bankHeight)
++            {
++                case 0:
++                    pTileInfoOut->bankHeight = 1;
++                    break;
++                case 1:
++                    pTileInfoOut->bankHeight = 2;
++                    break;
++                case 2:
++                    pTileInfoOut->bankHeight = 4;
++                    break;
++                case 3:
++                    pTileInfoOut->bankHeight = 8;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->bankHeight = 1;
++                    break;
++            }
++
++            switch (pTileInfoIn->macroAspectRatio)
++            {
++                case 0:
++                    pTileInfoOut->macroAspectRatio = 1;
++                    break;
++                case 1:
++                    pTileInfoOut->macroAspectRatio = 2;
++                    break;
++                case 2:
++                    pTileInfoOut->macroAspectRatio = 4;
++                    break;
++                case 3:
++                    pTileInfoOut->macroAspectRatio = 8;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->macroAspectRatio = 1;
++                    break;
++            }
++
++            switch (pTileInfoIn->tileSplitBytes)
++            {
++                case 0:
++                    pTileInfoOut->tileSplitBytes = 64;
++                    break;
++                case 1:
++                    pTileInfoOut->tileSplitBytes = 128;
++                    break;
++                case 2:
++                    pTileInfoOut->tileSplitBytes = 256;
++                    break;
++                case 3:
++                    pTileInfoOut->tileSplitBytes = 512;
++                    break;
++                case 4:
++                    pTileInfoOut->tileSplitBytes = 1024;
++                    break;
++                case 5:
++                    pTileInfoOut->tileSplitBytes = 2048;
++                    break;
++                case 6:
++                    pTileInfoOut->tileSplitBytes = 4096;
++                    break;
++                default:
++                    ADDR_ASSERT_ALWAYS();
++                    retCode = ADDR_INVALIDPARAMS;
++                    pTileInfoOut->tileSplitBytes = 64;
++                    break;
++            }
++        }
++
++        if (pTileInfoIn != pTileInfoOut)
++        {
++            pTileInfoOut->pipeConfig = pTileInfoIn->pipeConfig;
++        }
++    }
++    else
++    {
++        ADDR_ASSERT_ALWAYS();
++        retCode = ADDR_INVALIDPARAMS;
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeSurfaceInfo
++*   @brief
++*       Entry of EgBasedAddrLib ComputeSurfaceInfo
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceInfo(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++    if (pIn->numSamples < pIn->numFrags)
++    {
++        retCode = ADDR_INVALIDPARAMS;
++    }
++
++    ADDR_TILEINFO tileInfo = {0};
++
++    if (retCode == ADDR_OK)
++    {
++        // Uses internal tile info if pOut does not have a valid pTileInfo
++        if (pOut->pTileInfo == NULL)
++        {
++            pOut->pTileInfo = &tileInfo;
++        }
++
++        if (!DispatchComputeSurfaceInfo(pIn, pOut))
++        {
++            retCode = ADDR_INVALIDPARAMS;
++        }
++
++        // Returns an index
++        pOut->tileIndex = HwlPostCheckTileIndex(pOut->pTileInfo,
++                                                pOut->tileMode,
++                                                pOut->tileType,
++                                                pOut->tileIndex);
++
++        if (IsMacroTiled(pOut->tileMode) && (pOut->macroModeIndex == TileIndexInvalid))
++        {
++            pOut->macroModeIndex = HwlComputeMacroModeIndex(pOut->tileIndex,
++                                                            pIn->flags,
++                                                            pIn->bpp,
++                                                            pIn->numSamples,
++                                                            pOut->pTileInfo);
++        }
++
++        // Resets pTileInfo to NULL if the internal tile info is used
++        if (pOut->pTileInfo == &tileInfo)
++        {
++#if DEBUG
++            // Client does not pass in a valid pTileInfo
++            if (IsMacroTiled(pOut->tileMode))
++            {
++                // If a valid index is returned, then no pTileInfo is okay
++                ADDR_ASSERT(!m_configFlags.useTileIndex || pOut->tileIndex != TileIndexInvalid);
++
++                if (!IsTileInfoAllZero(pIn->pTileInfo))
++                {
++                    // The initial value of pIn->pTileInfo is copied to tileInfo
++                    // We do not expect any of these value to be changed nor any 0 of inputs
++                    ADDR_ASSERT(tileInfo.banks == pIn->pTileInfo->banks);
++                    ADDR_ASSERT(tileInfo.bankWidth == pIn->pTileInfo->bankWidth);
++                    ADDR_ASSERT(tileInfo.bankHeight == pIn->pTileInfo->bankHeight);
++                    ADDR_ASSERT(tileInfo.macroAspectRatio == pIn->pTileInfo->macroAspectRatio);
++                    ADDR_ASSERT(tileInfo.tileSplitBytes == pIn->pTileInfo->tileSplitBytes);
++                }
++            }
++#endif
++            pOut->pTileInfo = NULL;
++        }
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeSurfaceAddrFromCoord
++*   @brief
++*       Entry of EgBasedAddrLib ComputeSurfaceAddrFromCoord
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceAddrFromCoord(
++    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++    if (
++#if !ALT_TEST // Overflow test needs this out-of-boundary coord
++        (pIn->x > pIn->pitch)   ||
++        (pIn->y > pIn->height)  ||
++#endif
++        (pIn->numSamples > m_maxSamples))
++    {
++        retCode = ADDR_INVALIDPARAMS;
++    }
++    else
++    {
++        pOut->addr = DispatchComputeSurfaceAddrFromCoord(pIn, pOut);
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeSurfaceCoordFromAddr
++*   @brief
++*       Entry of EgBasedAddrLib ComputeSurfaceCoordFromAddr
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceCoordFromAddr(
++    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++    if ((pIn->bitPosition >= 8) ||
++        (pIn->numSamples > m_maxSamples))
++    {
++        retCode = ADDR_INVALIDPARAMS;
++    }
++    else
++    {
++        DispatchComputeSurfaceCoordFromAddr(pIn, pOut);
++    }
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeSliceTileSwizzle
++*   @brief
++*       Entry of EgBasedAddrLib ComputeSurfaceCoordFromAddr
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSliceTileSwizzle(
++    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE retCode = ADDR_OK;
++
++    if (pIn->pTileInfo && (pIn->pTileInfo->banks > 0))
++    {
++
++        pOut->tileSwizzle = ComputeSliceTileSwizzle(pIn->tileMode,
++                                                    pIn->baseSwizzle,
++                                                    pIn->slice,
++                                                    pIn->baseAddr,
++                                                    pIn->pTileInfo);
++    }
++    else
++    {
++        retCode = ADDR_INVALIDPARAMS;
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeHtileBpp
++*
++*   @brief
++*       Compute htile bpp
++*
++*   @return
++*       Htile bpp
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::HwlComputeHtileBpp(
++    BOOL_32 isWidth8,   ///< [in] TRUE if block width is 8
++    BOOL_32 isHeight8   ///< [in] TRUE if block height is 8
++    ) const
++{
++    // only support 8x8 mode
++    ADDR_ASSERT(isWidth8 && isHeight8);
++    return 32;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlComputeHtileBaseAlign
++*
++*   @brief
++*       Compute htile base alignment
++*
++*   @return
++*       Htile base alignment
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::HwlComputeHtileBaseAlign(
++    BOOL_32         isTcCompatible, ///< [in] if TC compatible
++    BOOL_32         isLinear,       ///< [in] if it is linear mode
++    ADDR_TILEINFO*  pTileInfo       ///< [in] Tile info
++    ) const
++{
++    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
++
++    if (isTcCompatible)
++    {
++        ADDR_ASSERT(pTileInfo != NULL);
++        if (pTileInfo)
++        {
++            baseAlign *= pTileInfo->banks;
++        }
++    }
++
++    return baseAlign;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled
++*
++*   @brief
++*       Compute 1D tiled surface pitch alignment, calculation results are returned through
++*       output parameters.
++*
++*   @return
++*       pitch alignment
++***************************************************************************************************
++*/
++UINT_32 EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled(
++    AddrTileMode        tileMode,          ///< [in] tile mode
++    UINT_32             bpp,               ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
++    UINT_32             numSamples         ///< [in] number of samples
++    ) const
++{
++    UINT_32 pitchAlign;
++
++    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
++
++    UINT_32 pixelsPerMicroTile;
++    UINT_32 pixelsPerPipeInterleave;
++    UINT_32 microTilesPerPipeInterleave;
++
++    //
++    // Special workaround for depth/stencil buffer, use 8 bpp to meet larger requirement for
++    // stencil buffer since pitch alignment is related to bpp.
++    // For a depth only buffer do not set this.
++    //
++    // Note: this actually does not work for mipmap but mipmap depth texture is not really
++    // sampled with mipmap.
++    //
++    if (flags.depth && !flags.noStencil)
++    {
++        bpp = 8;
++    }
++
++    pixelsPerMicroTile = MicroTilePixels * microTileThickness;
++    pixelsPerPipeInterleave = BYTES_TO_BITS(m_pipeInterleaveBytes) / (bpp * numSamples);
++    microTilesPerPipeInterleave = pixelsPerPipeInterleave / pixelsPerMicroTile;
++
++    pitchAlign = Max(MicroTileWidth, microTilesPerPipeInterleave * MicroTileWidth);
++
++    return pitchAlign;
++}
++
++/**
++***************************************************************************************************
++*   EgBasedAddrLib::HwlGetSizeAdjustmentMicroTiled
++*
++*   @brief
++*       Adjust 1D tiled surface pitch and slice size
++*
++*   @return
++*       Logical slice size in bytes
++***************************************************************************************************
++*/
++UINT_64 EgBasedAddrLib::HwlGetSizeAdjustmentMicroTiled(
++    UINT_32             thickness,      ///< [in] thickness
++    UINT_32             bpp,            ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,          ///< [in] surface flags
++    UINT_32             numSamples,     ///< [in] number of samples
++    UINT_32             baseAlign,      ///< [in] base alignment
++    UINT_32             pitchAlign,     ///< [in] pitch alignment
++    UINT_32*            pPitch,         ///< [in/out] pointer to pitch
++    UINT_32*            pHeight         ///< [in/out] pointer to height
++    ) const
++{
++    UINT_64 logicalSliceSize;
++    UINT_64 physicalSliceSize;
++
++    UINT_32 pitch   = *pPitch;
++    UINT_32 height  = *pHeight;
++
++    // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
++    logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
++
++    // Physical slice: multiplied by thickness
++    physicalSliceSize =  logicalSliceSize * thickness;
++
++    //
++    // R800 will always pad physical slice size to baseAlign which is pipe_interleave_bytes
++    //
++    ADDR_ASSERT((physicalSliceSize % baseAlign) == 0)
++
++    return logicalSliceSize;
++}
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.h b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.h
+new file mode 100644
+index 0000000..84adb66
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/egbaddrlib.h
+@@ -0,0 +1,411 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  egbaddrlib.h
++* @brief Contains the EgBasedAddrLib class definition.
++***************************************************************************************************
++*/
++
++#ifndef __EG_BASED_ADDR_LIB_H__
++#define __EG_BASED_ADDR_LIB_H__
++
++#include "addrlib.h"
++
++
++/// Structures for functions
++struct CoordFromBankPipe
++{
++    UINT_32 xBits : 3;
++    UINT_32 yBits : 4;
++
++    UINT_32 xBit3 : 1;
++    UINT_32 xBit4 : 1;
++    UINT_32 xBit5 : 1;
++    UINT_32 yBit3 : 1;
++    UINT_32 yBit4 : 1;
++    UINT_32 yBit5 : 1;
++    UINT_32 yBit6 : 1;
++};
++
++/**
++***************************************************************************************************
++* @brief This class is the Evergreen based address library
++* @note  Abstract class
++***************************************************************************************************
++*/
++class EgBasedAddrLib : public AddrLib
++{
++protected:
++    EgBasedAddrLib(const AddrClient* pClient);
++    virtual ~EgBasedAddrLib();
++
++public:
++
++    /// Surface info functions
++
++    // NOTE: DispatchComputeSurfaceInfo using TileInfo takes both an input and an output.
++    //       On input:
++    //       One or more fields may be 0 to be calculated/defaulted - pre-SI h/w.
++    //       H/W using tile mode index only accepts none or all 0's - SI and newer h/w.
++    //       It then returns the actual tiling configuration used.
++    //       Other methods' TileInfo must be valid on entry
++    BOOL_32 DispatchComputeSurfaceInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    ADDR_E_RETURNCODE DispatchComputeFmaskInfo(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
++
++protected:
++    // Hwl interface
++    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
++        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
++        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
++        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
++        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
++        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
++        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
++        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
++        UINT_64 baseAddr, UINT_32* pTileSwizzle) const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
++        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
++        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
++        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
++        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
++
++    virtual UINT_32 HwlComputeHtileBpp(
++        BOOL_32 isWidth8, BOOL_32 isHeight8) const;
++
++    virtual UINT_32 HwlComputeHtileBaseAlign(
++        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
++
++    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
++        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
++        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
++
++    virtual BOOL_32 HwlDegradeBaseLevel(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
++
++    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo) const;
++
++    virtual VOID HwlComputePixelCoordFromOffset(
++        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
++        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
++        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
++        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
++
++    /// Return Cmask block max
++    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const
++    {
++        return 16383; // 14 bits
++    }
++
++    // Sub-hwl interface
++    /// Pure virtual function to setup tile info (indices) if client requests to do so
++    virtual VOID HwlSetupTileInfo(
++        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
++        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
++        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
++
++    /// Pure virtual function to get pitch alignment for linear modes
++    virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const = 0;
++
++    /// Pure virtual function to get size adjustment for linear modes
++    virtual UINT_64 HwlGetSizeAdjustmentLinear(
++        AddrTileMode tileMode,
++        UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
++        UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const = 0;
++
++    virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
++        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
++
++    virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
++        UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
++        UINT_32 baseAlign, UINT_32 pitchAlign,
++        UINT_32 *pPitch, UINT_32 *pHeight) const;
++
++        /// Pure virtual function to do extra sanity check
++    virtual BOOL_32 HwlSanityCheckMacroTiled(
++        ADDR_TILEINFO* pTileInfo) const = 0;
++
++    /// Pure virtual function to check current level to be the last macro tiled one
++    virtual VOID HwlCheckLastMacroTiledLvl(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
++
++    /// Adjusts bank before bank is modified by rotation
++    virtual UINT_32 HwlPreAdjustBank(
++        UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO*  pTileInfo) const = 0;
++
++    virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
++        AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
++        UINT_32 bank, UINT_32 pipe,
++        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
++        BOOL_32 ignoreSE,
++        ADDR_TILEINFO* pTileInfo) const = 0;
++
++    virtual BOOL_32 HwlTileInfoEqual(
++        const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
++
++    virtual AddrTileMode HwlDegradeThickTileMode(
++        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
++
++    virtual INT_32 HwlPostCheckTileIndex(
++        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
++        INT curIndex = TileIndexInvalid) const
++    {
++        return TileIndexInvalid;
++    }
++
++    virtual VOID HwlFmaskPreThunkSurfInfo(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
++        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
++        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const
++    {
++    }
++
++    virtual VOID HwlFmaskPostThunkSurfInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
++        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const
++    {
++    }
++
++    /// Virtual function to check if the height needs extra padding
++    /// for stereo right eye offset, to avoid bank pipe swizzle
++    virtual BOOL_32 HwlStereoCheckRightOffsetPadding() const
++    {
++        return FALSE;
++    }
++
++    virtual BOOL_32 HwlReduceBankWidthHeight(
++        UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
++        UINT_32 bankHeightAlign, UINT_32 pipes,
++        ADDR_TILEINFO* pTileInfo) const;
++
++    // Protected non-virtual functions
++
++    /// Mip level functions
++    AddrTileMode ComputeSurfaceMipLevelTileMode(
++        AddrTileMode baseTileMode, UINT_32 bpp,
++        UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32 numSamples,
++        UINT_32 pitchAlign, UINT_32 heightAlign,
++        ADDR_TILEINFO* pTileInfo) const;
++
++    /// Swizzle functions
++    VOID ExtractBankPipeSwizzle(
++        UINT_32 base256b, ADDR_TILEINFO* pTileInfo,
++        UINT_32* pBankSwizzle, UINT_32* pPipeSwizzle) const;
++
++    UINT_32 GetBankPipeSwizzle(
++        UINT_32 bankSwizzle, UINT_32 pipeSwizzle,
++        UINT_64 baseAddr, ADDR_TILEINFO*  pTileInfo) const;
++
++    UINT_32 ComputeSliceTileSwizzle(
++        AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr,
++        ADDR_TILEINFO* pTileInfo) const;
++
++    /// Addressing functions
++    UINT_32 ComputeBankFromCoord(
++        UINT_32 x, UINT_32 y, UINT_32 slice,
++        AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice,
++        ADDR_TILEINFO* pTileInfo) const;
++
++    UINT_32 ComputeBankFromAddr(
++        UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const;
++
++    UINT_32 ComputePipeRotation(
++        AddrTileMode tileMode, UINT_32 numPipes) const;
++
++    UINT_32 ComputeBankRotation(
++        AddrTileMode tileMode, UINT_32 numBanks,
++        UINT_32 numPipes) const;
++
++    VOID ComputeSurfaceCoord2DFromBankPipe(
++        AddrTileMode tileMode, UINT_32 x, UINT_32 y, UINT_32 slice,
++        UINT_32 bank, UINT_32 pipe,
++        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
++        ADDR_TILEINFO* pTileInfo,
++        CoordFromBankPipe *pOutput) const;
++
++    /// Htile/Cmask functions
++    UINT_64 ComputeHtileBytes(
++        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
++        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const;
++
++    // Static functions
++    static BOOL_32 IsTileInfoAllZero(ADDR_TILEINFO* pTileInfo);
++    static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples);
++    static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples);
++
++private:
++
++    BOOL_32 ComputeSurfaceInfoLinear(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
++        UINT_32 padDims) const;
++
++    BOOL_32 ComputeSurfaceInfoMicroTiled(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
++        UINT_32 padDims,
++        AddrTileMode expTileMode) const;
++
++    BOOL_32 ComputeSurfaceInfoMacroTiled(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
++        UINT_32 padDims,
++        AddrTileMode expTileMode) const;
++
++    BOOL_32 ComputeSurfaceAlignmentsLinear(
++        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
++        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
++
++    BOOL_32 ComputeSurfaceAlignmentsMicroTiled(
++        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
++        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
++
++    BOOL_32 ComputeSurfaceAlignmentsMacroTiled(
++        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
++        UINT_32 mipLevel, UINT_32 numSamples,
++        ADDR_TILEINFO* pTileInfo,
++        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
++
++    /// Surface addressing functions
++    UINT_64 DispatchComputeSurfaceAddrFromCoord(
++        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++    VOID    DispatchComputeSurfaceCoordFromAddr(
++        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
++
++    UINT_64 ComputeSurfaceAddrFromCoordMicroTiled(
++        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
++        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        AddrTileMode tileMode,
++        AddrTileType microTileType, BOOL_32 isDepthSampleOrder,
++        UINT_32* pBitPosition) const;
++
++    UINT_64 ComputeSurfaceAddrFromCoordMacroTiled(
++        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
++        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        AddrTileMode tileMode,
++        AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
++        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
++        ADDR_TILEINFO* pTileInfo,
++        UINT_32* pBitPosition) const;
++
++    VOID    ComputeSurfaceCoordFromAddrMacroTiled(
++        UINT_64 addr, UINT_32 bitPosition,
++        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
++        AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
++        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
++        ADDR_TILEINFO* pTileInfo,
++        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
++
++    /// Fmask functions
++    UINT_64 DispatchComputeFmaskAddrFromCoord(
++        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
++
++    VOID    DispatchComputeFmaskCoordFromAddr(
++        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
++        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
++
++    // FMASK related methods - private
++    UINT_64 ComputeFmaskAddrFromCoordMicroTiled(
++        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
++        UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
++        BOOL_32 resolved, UINT_32* pBitPosition) const;
++
++    VOID    ComputeFmaskCoordFromAddrMicroTiled(
++        UINT_64 addr, UINT_32 bitPosition,
++        UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        AddrTileMode tileMode, BOOL_32 resolved,
++        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
++
++    VOID    ComputeFmaskCoordFromAddrMacroTiled(
++        UINT_64 addr, UINT_32 bitPosition,
++        UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
++        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
++        BOOL_32 ignoreSE,
++        ADDR_TILEINFO* pTileInfo,
++        BOOL_32 resolved,
++        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
++
++    UINT_64 ComputeFmaskAddrFromCoordMacroTiled(
++        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
++        UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        AddrTileMode tileMode, UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
++        BOOL_32 ignoreSE,
++        ADDR_TILEINFO* pTileInfo,
++        BOOL_32 resolved,
++        UINT_32* pBitPosition) const;
++
++    /// Sanity check functions
++    BOOL_32 SanityCheckMacroTiled(
++        ADDR_TILEINFO* pTileInfo) const;
++
++protected:
++    UINT_32 m_ranks;                ///< Number of ranks - MC_ARB_RAMCFG.NOOFRANK
++    UINT_32 m_logicalBanks;         ///< Logical banks = m_banks * m_ranks if m_banks != 16
++    UINT_32 m_bankInterleave;       ///< Bank interleave, as a multiple of pipe interleave size
++};
++
++#endif
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/r800/siaddrlib.cpp b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/siaddrlib.cpp
+new file mode 100644
+index 0000000..a858b55
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/siaddrlib.cpp
+@@ -0,0 +1,2818 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  siaddrlib.cpp
++* @brief Contains the implementation for the SIAddrLib class.
++***************************************************************************************************
++*/
++
++#include "siaddrlib.h"
++
++#include "si_gb_reg.h"
++
++#include "si_ci_vi_merged_enum.h"
++
++#if BRAHMA_BUILD
++#include "amdgpu_id.h"
++#else
++#include "si_id.h"
++#endif
++
++///////////////////////////////////////////////////////////////////////////////////////////////////
++///////////////////////////////////////////////////////////////////////////////////////////////////
++
++/**
++***************************************************************************************************
++*   AddrSIHwlInit
++*
++*   @brief
++*       Creates an SIAddrLib object.
++*
++*   @return
++*       Returns an SIAddrLib object pointer.
++***************************************************************************************************
++*/
++AddrLib* AddrSIHwlInit(const AddrClient* pClient)
++{
++    return SIAddrLib::CreateObj(pClient);
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::SIAddrLib
++*
++*   @brief
++*       Constructor
++*
++***************************************************************************************************
++*/
++SIAddrLib::SIAddrLib(const AddrClient* pClient) :
++    EgBasedAddrLib(pClient),
++    m_noOfEntries(0)
++{
++    m_class = SI_ADDRLIB;
++    memset(&m_settings, 0, sizeof(m_settings));
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::~SIAddrLib
++*
++*   @brief
++*       Destructor
++***************************************************************************************************
++*/
++SIAddrLib::~SIAddrLib()
++{
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlGetPipes
++*
++*   @brief
++*       Get number pipes
++*   @return
++*       num pipes
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::HwlGetPipes(
++    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
++    ) const
++{
++    UINT_32 numPipes;
++
++    if (pTileInfo)
++    {
++        numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
++    }
++    else
++    {
++        ADDR_ASSERT_ALWAYS();
++        numPipes = m_pipes; // Suppose we should still have a global pipes
++    }
++
++    return numPipes;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::GetPipePerSurf
++*   @brief
++*       get pipe num base on inputing tileinfo->pipeconfig
++*   @return
++*       pipe number
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::GetPipePerSurf(
++    AddrPipeCfg pipeConfig   ///< [in] pipe config
++    ) const
++{
++    UINT_32 numPipes = 0;
++
++    switch (pipeConfig)
++    {
++        case ADDR_PIPECFG_P2:
++            numPipes = 2;
++            break;
++        case ADDR_PIPECFG_P4_8x16:
++        case ADDR_PIPECFG_P4_16x16:
++        case ADDR_PIPECFG_P4_16x32:
++        case ADDR_PIPECFG_P4_32x32:
++            numPipes = 4;
++            break;
++        case ADDR_PIPECFG_P8_16x16_8x16:
++        case ADDR_PIPECFG_P8_16x32_8x16:
++        case ADDR_PIPECFG_P8_32x32_8x16:
++        case ADDR_PIPECFG_P8_16x32_16x16:
++        case ADDR_PIPECFG_P8_32x32_16x16:
++        case ADDR_PIPECFG_P8_32x32_16x32:
++        case ADDR_PIPECFG_P8_32x64_32x32:
++            numPipes = 8;
++            break;
++        case ADDR_PIPECFG_P16_32x32_8x16:
++        case ADDR_PIPECFG_P16_32x32_16x16:
++            numPipes = 16;
++            break;
++        default:
++            ADDR_ASSERT(!"Invalid pipe config");
++            numPipes = m_pipes;
++    }
++    return numPipes;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::ComputePipeFromCoord
++*
++*   @brief
++*       Compute pipe number from coordinates
++*   @return
++*       Pipe number
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::ComputePipeFromCoord(
++    UINT_32         x,              ///< [in] x coordinate
++    UINT_32         y,              ///< [in] y coordinate
++    UINT_32         slice,          ///< [in] slice index
++    AddrTileMode    tileMode,       ///< [in] tile mode
++    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
++    BOOL_32         ignoreSE,       ///< [in] TRUE if shader engines are ignored
++    ADDR_TILEINFO*  pTileInfo       ///< [in] Tile info
++    ) const
++{
++    UINT_32 pipe;
++    UINT_32 pipeBit0 = 0;
++    UINT_32 pipeBit1 = 0;
++    UINT_32 pipeBit2 = 0;
++    UINT_32 pipeBit3 = 0;
++    UINT_32 sliceRotation;
++    UINT_32 numPipes = 0;
++
++    UINT_32 tx = x / MicroTileWidth;
++    UINT_32 ty = y / MicroTileHeight;
++    UINT_32 x3 = _BIT(tx,0);
++    UINT_32 x4 = _BIT(tx,1);
++    UINT_32 x5 = _BIT(tx,2);
++    UINT_32 x6 = _BIT(tx,3);
++    UINT_32 y3 = _BIT(ty,0);
++    UINT_32 y4 = _BIT(ty,1);
++    UINT_32 y5 = _BIT(ty,2);
++    UINT_32 y6 = _BIT(ty,3);
++
++    switch (pTileInfo->pipeConfig)
++    {
++        case ADDR_PIPECFG_P2:
++            pipeBit0 = x3 ^ y3;
++            numPipes = 2;
++            break;
++        case ADDR_PIPECFG_P4_8x16:
++            pipeBit0 = x4 ^ y3;
++            pipeBit1 = x3 ^ y4;
++            numPipes = 4;
++            break;
++        case ADDR_PIPECFG_P4_16x16:
++            pipeBit0 = x3 ^ y3 ^ x4;
++            pipeBit1 = x4 ^ y4;
++            numPipes = 4;
++            break;
++        case ADDR_PIPECFG_P4_16x32:
++            pipeBit0 = x3 ^ y3 ^ x4;
++            pipeBit1 = x4 ^ y5;
++            numPipes = 4;
++            break;
++        case ADDR_PIPECFG_P4_32x32:
++            pipeBit0 = x3 ^ y3 ^ x5;
++            pipeBit1 = x5 ^ y5;
++            numPipes = 4;
++            break;
++        case ADDR_PIPECFG_P8_16x16_8x16:
++            pipeBit0 = x4 ^ y3 ^ x5;
++            pipeBit1 = x3 ^ y5;
++            numPipes = 8;
++            break;
++        case ADDR_PIPECFG_P8_16x32_8x16:
++            pipeBit0 = x4 ^ y3 ^ x5;
++            pipeBit1 = x3 ^ y4;
++            pipeBit2 = x4 ^ y5;
++            numPipes = 8;
++            break;
++        case ADDR_PIPECFG_P8_16x32_16x16:
++            pipeBit0 = x3 ^ y3 ^ x4;
++            pipeBit1 = x5 ^ y4;
++            pipeBit2 = x4 ^ y5;
++            numPipes = 8;
++            break;
++        case ADDR_PIPECFG_P8_32x32_8x16:
++            pipeBit0 = x4 ^ y3 ^ x5;
++            pipeBit1 = x3 ^ y4;
++            pipeBit2 = x5 ^ y5;
++            numPipes = 8;
++            break;
++        case ADDR_PIPECFG_P8_32x32_16x16:
++            pipeBit0 = x3 ^ y3 ^ x4;
++            pipeBit1 = x4 ^ y4;
++            pipeBit2 = x5 ^ y5;
++            numPipes = 8;
++            break;
++        case ADDR_PIPECFG_P8_32x32_16x32:
++            pipeBit0 = x3 ^ y3 ^ x4;
++            pipeBit1 = x4 ^ y6;
++            pipeBit2 = x5 ^ y5;
++            numPipes = 8;
++            break;
++        case ADDR_PIPECFG_P8_32x64_32x32:
++            pipeBit0 = x3 ^ y3 ^ x5;
++            pipeBit1 = x6 ^ y5;
++            pipeBit2 = x5 ^ y6;
++            numPipes = 8;
++            break;
++        case ADDR_PIPECFG_P16_32x32_8x16:
++            pipeBit0 = x4 ^ y3;
++            pipeBit1 = x3 ^ y4;
++            pipeBit2 = x5 ^ y6;
++            pipeBit3 = x6 ^ y5;
++            numPipes = 16;
++            break;
++        case ADDR_PIPECFG_P16_32x32_16x16:
++            pipeBit0 = x3 ^ y3 ^ x4;
++            pipeBit1 = x4 ^ y4;
++            pipeBit2 = x5 ^ y6;
++            pipeBit3 = x6 ^ y5;
++            numPipes = 16;
++            break;
++        default:
++            ADDR_UNHANDLED_CASE();
++            break;
++    }
++    pipe = pipeBit0 | (pipeBit1 << 1) | (pipeBit2 << 2) | (pipeBit3 << 3);
++
++    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
++
++    //
++    // Apply pipe rotation for the slice.
++    //
++    switch (tileMode)
++    {
++        case ADDR_TM_3D_TILED_THIN1:    //fall through thin
++        case ADDR_TM_3D_TILED_THICK:    //fall through thick
++        case ADDR_TM_3D_TILED_XTHICK:
++            sliceRotation =
++                Max(1, static_cast<INT_32>(numPipes / 2) - 1) * (slice / microTileThickness);
++            break;
++        default:
++            sliceRotation = 0;
++            break;
++    }
++    pipeSwizzle += sliceRotation;
++    pipeSwizzle &= (numPipes - 1);
++
++    pipe = pipe ^ pipeSwizzle;
++
++    return pipe;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::ComputeTileCoordFromPipeAndElemIdx
++*
++*   @brief
++*       Compute (x,y) of a tile within a macro tile from address
++*   @return
++*       Pipe number
++***************************************************************************************************
++*/
++VOID SIAddrLib::ComputeTileCoordFromPipeAndElemIdx(
++    UINT_32         elemIdx,          ///< [in] per pipe element index within a macro tile
++    UINT_32         pipe,             ///< [in] pipe index
++    AddrPipeCfg     pipeCfg,          ///< [in] pipe config
++    UINT_32         pitchInMacroTile, ///< [in] surface pitch in macro tile
++    UINT_32         x,                ///< [in] x coordinate of the (0,0) tile in a macro tile
++    UINT_32         y,                ///< [in] y coordinate of the (0,0) tile in a macro tile
++    UINT_32*        pX,               ///< [out] x coordinate
++    UINT_32*        pY                ///< [out] y coordinate
++    ) const
++{
++    UINT_32 pipebit0 = _BIT(pipe,0);
++    UINT_32 pipebit1 = _BIT(pipe,1);
++    UINT_32 pipebit2 = _BIT(pipe,2);
++    UINT_32 pipebit3 = _BIT(pipe,3);
++    UINT_32 elemIdx0 = _BIT(elemIdx,0);
++    UINT_32 elemIdx1 = _BIT(elemIdx,1);
++    UINT_32 elemIdx2 = _BIT(elemIdx,2);
++    UINT_32 x3 = 0;
++    UINT_32 x4 = 0;
++    UINT_32 x5 = 0;
++    UINT_32 x6 = 0;
++    UINT_32 y3 = 0;
++    UINT_32 y4 = 0;
++    UINT_32 y5 = 0;
++    UINT_32 y6 = 0;
++
++    switch(pipeCfg)
++    {
++        case ADDR_PIPECFG_P2:
++            x4 = elemIdx2;
++            y4 = elemIdx1 ^ x4;
++            y3 = elemIdx0 ^ x4;
++            x3 = pipebit0 ^ y3;
++            *pY = Bits2Number(2, y4, y3);
++            *pX = Bits2Number(2, x4, x3);
++            break;
++        case ADDR_PIPECFG_P4_8x16:
++            x4 = elemIdx1;
++            y4 = elemIdx0 ^ x4;
++            x3 = pipebit1 ^ y4;
++            y3 = pipebit0 ^ x4;
++            *pY = Bits2Number(2, y4, y3);
++            *pX = Bits2Number(2, x4, x3);
++            break;
++        case ADDR_PIPECFG_P4_16x16:
++            x4 = elemIdx1;
++            y3 = elemIdx0 ^ x4;
++            y4 = pipebit1 ^ x4;
++            x3 = pipebit0 ^ y3 ^ x4;
++            *pY = Bits2Number(2, y4, y3);
++            *pX = Bits2Number(2, x4, x3);
++            break;
++        case ADDR_PIPECFG_P4_16x32:
++            x3 = elemIdx0 ^ pipebit0;
++            y5 = _BIT(y,5);
++            x4 = pipebit1 ^ y5;
++            y3 = pipebit0 ^ x3 ^ x4;
++            y4 = elemIdx1 ^ x4;
++            *pY = Bits2Number(2, y4, y3);
++            *pX = Bits2Number(2, x4, x3);
++            break;
++        case ADDR_PIPECFG_P4_32x32:
++            x4 = elemIdx2;
++            y3 = elemIdx0 ^ x4;
++            y4 = elemIdx1 ^ x4;
++            if((pitchInMacroTile % 2) == 0)
++            {   //even
++                y5 = _BIT(y,5);
++                x5 = pipebit1 ^ y5;
++                x3 = pipebit0 ^ y3 ^ x5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(3, x5, x4, x3);
++            }
++            else
++            {   //odd
++                x5 = _BIT(x,5);
++                x3 = pipebit0 ^ y3 ^ x5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(2, x4, x3);
++            }
++            break;
++        case ADDR_PIPECFG_P8_16x16_8x16:
++            x4 = elemIdx0;
++            y5 = _BIT(y,5);
++            x5 = _BIT(x,5);
++            x3 = pipebit1 ^ y5;
++            y4 = pipebit2 ^ x4;
++            y3 = pipebit0 ^ x5 ^ x4;
++            *pY = Bits2Number(2, y4, y3);
++            *pX = Bits2Number(2, x4, x3);
++            break;
++        case ADDR_PIPECFG_P8_16x32_8x16:
++            x3 = elemIdx0;
++            y4 = pipebit1 ^ x3;
++            y5 = _BIT(y,5);
++            x5 = _BIT(x,5);
++            x4 = pipebit2 ^ y5;
++            y3 = pipebit0 ^ x4 ^ x5;
++            *pY = Bits2Number(2, y4, y3);
++            *pX = Bits2Number(2, x4, x3);
++            break;
++        case ADDR_PIPECFG_P8_32x32_8x16:
++            x4 = elemIdx1;
++            y4 = elemIdx0 ^ x4;
++            x3 = pipebit1 ^ y4;
++            if((pitchInMacroTile % 2) == 0)
++            {  //even
++                y5 = _BIT(y,5);
++                x5 = _BIT(x,5);
++                x5 = pipebit2 ^ y5;
++                y3 = pipebit0 ^ x4 ^ x5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(3, x5, x4, x3);
++            }
++            else
++            {  //odd
++                x5 = _BIT(x,5);
++                y3 = pipebit0 ^ x4 ^ x5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(2, x4, x3);
++            }
++            break;
++        case ADDR_PIPECFG_P8_16x32_16x16:
++            x3 = elemIdx0;
++            x5 = _BIT(x,5);
++            y5 = _BIT(y,5);
++            x4 = pipebit2 ^ y5;
++            y4 = pipebit1 ^ x5;
++            y3 = pipebit0 ^ x3 ^ x4;
++            *pY = Bits2Number(2, y4, y3);
++            *pX = Bits2Number(2, x4, x3);
++            break;
++        case ADDR_PIPECFG_P8_32x32_16x16:
++            x4 = elemIdx1;
++            y3 = elemIdx0 ^ x4;
++            x3 = y3^x4^pipebit0;
++            y4 = pipebit1 ^ x4;
++            if((pitchInMacroTile % 2) == 0)
++            {   //even
++                y5 = _BIT(y,5);
++                x5 = pipebit2 ^ y5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(3, x5, x4, x3);
++            }
++            else
++            {   //odd
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(2, x4, x3);
++            }
++            break;
++        case ADDR_PIPECFG_P8_32x32_16x32:
++            if((pitchInMacroTile % 2) == 0)
++            {   //even
++                y5 = _BIT(y,5);
++                y6 = _BIT(y,6);
++                x4 = pipebit1 ^ y6;
++                y3 = elemIdx0 ^ x4;
++                y4 = elemIdx1 ^ x4;
++                x3 = pipebit0 ^ y3 ^ x4;
++                x5 = pipebit2 ^ y5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(3, x5, x4, x3);
++            }
++            else
++            {   //odd
++                y6 = _BIT(y,6);
++                x4 = pipebit1 ^ y6;
++                y3 = elemIdx0 ^ x4;
++                y4 = elemIdx1 ^ x4;
++                x3 = pipebit0 ^ y3 ^ x4;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(2, x4, x3);
++            }
++            break;
++        case ADDR_PIPECFG_P8_32x64_32x32:
++            x4 = elemIdx2;
++            y3 = elemIdx0 ^ x4;
++            y4 = elemIdx1 ^ x4;
++            if((pitchInMacroTile % 4) == 0)
++            {   //multiple of 4
++                y5 = _BIT(y,5);
++                y6 = _BIT(y,6);
++                x5 = pipebit2 ^ y6;
++                x6 = pipebit1 ^ y5;
++                x3 = pipebit0 ^ y3 ^ x5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(4, x6, x5, x4, x3);
++            }
++            else
++            {
++                y6 = _BIT(y,6);
++                x5 = pipebit2 ^ y6;
++                x3 = pipebit0 ^ y3 ^ x5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(3, x5, x4, x3);
++            }
++            break;
++        case ADDR_PIPECFG_P16_32x32_8x16:
++            x4 = elemIdx1;
++            y4 = elemIdx0 ^ x4;
++            y3 = pipebit0 ^ x4;
++            x3 = pipebit1 ^ y4;
++            if((pitchInMacroTile % 4) == 0)
++            {   //multiple of 4
++                y5 = _BIT(y,5);
++                y6 = _BIT(y,6);
++                x5 = pipebit2 ^ y6;
++                x6 = pipebit3 ^ y5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(4, x6, x5,x4, x3);
++            }
++            else
++            {
++                y6 = _BIT(y,6);
++                x5 = pipebit2 ^ y6;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(3, x5, x4, x3);
++            }
++            break;
++        case ADDR_PIPECFG_P16_32x32_16x16:
++            x4 = elemIdx1;
++            y3 = elemIdx0 ^ x4;
++            y4 = pipebit1 ^ x4;
++            x3 = pipebit0 ^ y3 ^ x4;
++            if((pitchInMacroTile % 4) == 0)
++            {   //multiple of 4
++                y5 = _BIT(y,5);
++                y6 = _BIT(y,6);
++                x5 = pipebit2 ^ y6;
++                x6 = pipebit3 ^ y5;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(4, x6, x5, x4, x3);
++            }
++            else
++            {
++                y6 = _BIT(y,6);
++                x5 = pipebit2 ^ y6;
++                *pY = Bits2Number(2, y4, y3);
++                *pX = Bits2Number(3, x5, x4, x3);
++            }
++            break;
++        default:
++            ADDR_UNHANDLED_CASE();
++    }
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::TileCoordToMaskElementIndex
++*
++*   @brief
++*       Compute element index from coordinates in tiles
++*   @return
++*       Element index
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::TileCoordToMaskElementIndex(
++    UINT_32         tx,                 ///< [in] x coord, in Tiles
++    UINT_32         ty,                 ///< [in] y coord, in Tiles
++    AddrPipeCfg     pipeConfig,         ///< [in] pipe config
++    UINT_32*        macroShift,         ///< [out] macro shift
++    UINT_32*        elemIdxBits         ///< [out] tile offset bits
++    ) const
++{
++    UINT_32 elemIdx = 0;
++    UINT_32 elemIdx0, elemIdx1, elemIdx2;
++    UINT_32 tx0, tx1;
++    UINT_32 ty0, ty1;
++
++    tx0 = _BIT(tx,0);
++    tx1 = _BIT(tx,1);
++    ty0 = _BIT(ty,0);
++    ty1 = _BIT(ty,1);
++
++    switch(pipeConfig)
++    {
++        case ADDR_PIPECFG_P2:
++            *macroShift = 3;
++            *elemIdxBits =3;
++            elemIdx2 = tx1;
++            elemIdx1 = tx1 ^ ty1;
++            elemIdx0 = tx1 ^ ty0;
++            elemIdx = Bits2Number(3,elemIdx2,elemIdx1,elemIdx0);
++            break;
++        case ADDR_PIPECFG_P4_8x16:
++            *macroShift = 2;
++            *elemIdxBits =2;
++            elemIdx1 = tx1;
++            elemIdx0 = tx1 ^ ty1;
++            elemIdx = Bits2Number(2,elemIdx1,elemIdx0);
++            break;
++        case ADDR_PIPECFG_P4_16x16:
++            *macroShift = 2;
++            *elemIdxBits =2;
++            elemIdx0 = tx1^ty0;
++            elemIdx1 = tx1;
++            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
++            break;
++        case ADDR_PIPECFG_P4_16x32:
++            *macroShift = 2;
++            *elemIdxBits =2;
++            elemIdx0 = tx1^ty0;
++            elemIdx1 = tx1^ty1;
++            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
++            break;
++        case ADDR_PIPECFG_P4_32x32:
++            *macroShift = 2;
++            *elemIdxBits =3;
++            elemIdx0 = tx1^ty0;
++            elemIdx1 = tx1^ty1;
++            elemIdx2 = tx1;
++            elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
++            break;
++        case ADDR_PIPECFG_P8_16x16_8x16:
++            *macroShift = 1;
++            *elemIdxBits =1;
++            elemIdx0 = tx1;
++            elemIdx = elemIdx0;
++            break;
++        case ADDR_PIPECFG_P8_16x32_8x16:
++            *macroShift = 1;
++            *elemIdxBits =1;
++            elemIdx0 = tx0;
++            elemIdx = elemIdx0;
++            break;
++        case ADDR_PIPECFG_P8_32x32_8x16:
++            *macroShift = 1;
++            *elemIdxBits =2;
++            elemIdx1 = tx1;
++            elemIdx0 = tx1^ty1;
++            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
++            break;
++        case ADDR_PIPECFG_P8_16x32_16x16:
++            *macroShift = 1;
++            *elemIdxBits =1;
++            elemIdx0 = tx0;
++            elemIdx = elemIdx0;
++            break;
++        case ADDR_PIPECFG_P8_32x32_16x16:
++            *macroShift = 1;
++            *elemIdxBits =2;
++            elemIdx0 = tx1^ty0;
++            elemIdx1 = tx1;
++            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
++            break;
++        case ADDR_PIPECFG_P8_32x32_16x32:
++            *macroShift = 1;
++            *elemIdxBits =2;
++            elemIdx0 =  tx1^ty0;
++            elemIdx1 = tx1^ty1;
++            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
++            break;
++        case ADDR_PIPECFG_P8_32x64_32x32:
++            *macroShift = 1;
++            *elemIdxBits =3;
++            elemIdx0 = tx1^ty0;
++            elemIdx1 = tx1^ty1;
++            elemIdx2 = tx1;
++            elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
++            break;
++        case ADDR_PIPECFG_P16_32x32_8x16:
++            *macroShift = 0;
++            *elemIdxBits =2;
++            elemIdx0 = tx1^ty1;
++            elemIdx1 = tx1;
++            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
++            break;
++        case ADDR_PIPECFG_P16_32x32_16x16:
++            *macroShift = 0;
++            *elemIdxBits =2;
++            elemIdx0 = tx1^ty0;
++            elemIdx1 = tx1;
++            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
++            break;
++        default:
++            ADDR_UNHANDLED_CASE();
++            break;
++    }
++
++    return elemIdx;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeTileDataWidthAndHeightLinear
++*
++*   @brief
++*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
++*
++*   @return
++*       N/A
++*
++*   @note
++*       MacroWidth and macroHeight are measured in pixels
++***************************************************************************************************
++*/
++VOID SIAddrLib::HwlComputeTileDataWidthAndHeightLinear(
++    UINT_32*        pMacroWidth,     ///< [out] macro tile width
++    UINT_32*        pMacroHeight,    ///< [out] macro tile height
++    UINT_32         bpp,             ///< [in] bits per pixel
++    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
++    ) const
++{
++    ADDR_ASSERT(pTileInfo != NULL);
++    UINT_32 macroWidth;
++    UINT_32 macroHeight;
++
++    /// In linear mode, the htile or cmask buffer must be padded out to 4 tiles
++    /// but for P8_32x64_32x32, it must be padded out to 8 tiles
++    /// Actually there are more pipe configs which need 8-tile padding but SI family
++    /// has a bug which is fixed in CI family
++    if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) ||
++        (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
++        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x32_16x16))
++    {
++        macroWidth  = 8*MicroTileWidth;
++        macroHeight = 8*MicroTileHeight;
++    }
++    else
++    {
++        macroWidth  = 4*MicroTileWidth;
++        macroHeight = 4*MicroTileHeight;
++    }
++
++    *pMacroWidth    = macroWidth;
++    *pMacroHeight   = macroHeight;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeHtileBytes
++*
++*   @brief
++*       Compute htile size in bytes
++*
++*   @return
++*       Htile size in bytes
++***************************************************************************************************
++*/
++UINT_64 SIAddrLib::HwlComputeHtileBytes(
++    UINT_32     pitch,          ///< [in] pitch
++    UINT_32     height,         ///< [in] height
++    UINT_32     bpp,            ///< [in] bits per pixel
++    BOOL_32     isLinear,       ///< [in] if it is linear mode
++    UINT_32     numSlices,      ///< [in] number of slices
++    UINT_64*    pSliceBytes,    ///< [out] bytes per slice
++    UINT_32     baseAlign       ///< [in] base alignments
++    ) const
++{
++    return ComputeHtileBytes(pitch, height, bpp, isLinear, numSlices, pSliceBytes, baseAlign);
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeXmaskAddrFromCoord
++*
++*   @brief
++*       Compute address from coordinates for htile/cmask
++*   @return
++*       Byte address
++***************************************************************************************************
++*/
++UINT_64 SIAddrLib::HwlComputeXmaskAddrFromCoord(
++    UINT_32        pitch,          ///< [in] pitch
++    UINT_32        height,         ///< [in] height
++    UINT_32        x,              ///< [in] x coord
++    UINT_32        y,              ///< [in] y coord
++    UINT_32        slice,          ///< [in] slice/depth index
++    UINT_32        numSlices,      ///< [in] number of slices
++    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
++    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
++    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
++    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
++    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
++    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
++    ) const
++{
++    UINT_32 tx = x / MicroTileWidth;
++    UINT_32 ty = y / MicroTileHeight;
++    UINT_32 newPitch;
++    UINT_32 newHeight;
++    UINT_64 totalBytes;
++    UINT_32 macroWidth;
++    UINT_32 macroHeight;
++    UINT_64 pSliceBytes;
++    UINT_32 pBaseAlign;
++    UINT_32 tileNumPerPipe;
++    UINT_32 elemBits;
++
++    if (factor == 2) //CMASK
++    {
++        ADDR_CMASK_FLAGS flags = {{0}};
++
++        tileNumPerPipe = 256;
++
++        ComputeCmaskInfo(flags,
++                         pitch,
++                         height,
++                         numSlices,
++                         isLinear,
++                         pTileInfo,
++                         &newPitch,
++                         &newHeight,
++                         &totalBytes,
++                         &macroWidth,
++                         &macroHeight);
++        elemBits = CmaskElemBits;
++    }
++    else //HTile
++    {
++        ADDR_HTILE_FLAGS flags = {{0}};
++
++        tileNumPerPipe = 512;
++
++        ComputeHtileInfo(flags,
++                         pitch,
++                         height,
++                         numSlices,
++                         isLinear,
++                         TRUE,
++                         TRUE,
++                         pTileInfo,
++                         &newPitch,
++                         &newHeight,
++                         &totalBytes,
++                         &macroWidth,
++                         &macroHeight,
++                         &pSliceBytes,
++                         &pBaseAlign);
++        elemBits = 32;
++    }
++
++    const UINT_32 pitchInTile = newPitch / MicroTileWidth;
++    const UINT_32 heightInTile = newHeight / MicroTileWidth;
++    UINT_64 macroOffset; // Per pipe starting offset of the macro tile in which this tile lies.
++    UINT_64 microNumber; // Per pipe starting offset of the macro tile in which this tile lies.
++    UINT_32 microX;
++    UINT_32 microY;
++    UINT_64 microOffset;
++    UINT_32 microShift;
++    UINT_64 totalOffset;
++    UINT_32 elemIdxBits;
++    UINT_32 elemIdx =
++        TileCoordToMaskElementIndex(tx, ty, pTileInfo->pipeConfig, &microShift, &elemIdxBits);
++
++    UINT_32 numPipes = HwlGetPipes(pTileInfo);
++
++    if (isLinear)
++    {   //linear addressing
++        // Linear addressing is extremelly wasting memory if slice > 1, since each pipe has the full
++        // slice memory foot print instead of divided by numPipes.
++        microX = tx / 4; // Macro Tile is 4x4
++        microY = ty / 4 ;
++        microNumber = static_cast<UINT_64>(microX + microY * (pitchInTile / 4)) << microShift;
++
++        UINT_32 sliceBits = pitchInTile * heightInTile;
++
++        // do htile single slice alignment if the flag is true
++        if (m_configFlags.useHtileSliceAlign && (factor == 1))  //Htile
++        {
++            sliceBits = PowTwoAlign(sliceBits, BITS_TO_BYTES(HtileCacheBits) * numPipes / elemBits);
++        }
++        macroOffset = slice * (sliceBits / numPipes) * elemBits ;
++    }
++    else
++    {   //tiled addressing
++        const UINT_32 macroWidthInTile = macroWidth / MicroTileWidth; // Now in unit of Tiles
++        const UINT_32 macroHeightInTile = macroHeight / MicroTileHeight;
++        const UINT_32 pitchInCL = pitchInTile / macroWidthInTile;
++        const UINT_32 heightInCL = heightInTile / macroHeightInTile;
++
++        const UINT_32 macroX = x / macroWidth;
++        const UINT_32 macroY = y / macroHeight;
++        const UINT_32 macroNumber = macroX + macroY * pitchInCL + slice * pitchInCL * heightInCL;
++
++        // Per pipe starting offset of the cache line in which this tile lies.
++        microX = (x % macroWidth) / MicroTileWidth / 4; // Macro Tile is 4x4
++        microY = (y % macroHeight) / MicroTileHeight / 4 ;
++        microNumber = static_cast<UINT_64>(microX + microY * (macroWidth / MicroTileWidth / 4)) << microShift;
++
++        macroOffset = macroNumber * tileNumPerPipe * elemBits;
++    }
++
++    if(elemIdxBits == microShift)
++    {
++        microNumber += elemIdx;
++    }
++    else
++    {
++        microNumber >>= elemIdxBits;
++        microNumber <<= elemIdxBits;
++        microNumber += elemIdx;
++    }
++
++    microOffset = elemBits * microNumber;
++    totalOffset = microOffset + macroOffset;
++
++    UINT_32 pipe = ComputePipeFromCoord(x, y, 0, ADDR_TM_2D_TILED_THIN1, 0, FALSE, pTileInfo);
++    UINT_64 addrInBits = totalOffset % (m_pipeInterleaveBytes * 8) +
++                   pipe * (m_pipeInterleaveBytes * 8) +
++                   totalOffset / (m_pipeInterleaveBytes * 8) * (m_pipeInterleaveBytes * 8) * numPipes;
++    *pBitPosition = static_cast<UINT_32>(addrInBits) % 8;
++    UINT_64 addr = addrInBits / 8;
++
++    return addr;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeXmaskCoordFromAddr
++*
++*   @brief
++*       Compute the coord from an address of a cmask/htile
++*
++*   @return
++*       N/A
++*
++*   @note
++*       This method is reused by htile, so rename to Xmask
++***************************************************************************************************
++*/
++VOID SIAddrLib::HwlComputeXmaskCoordFromAddr(
++    UINT_64         addr,           ///< [in] address
++    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
++    UINT_32         pitch,          ///< [in] pitch
++    UINT_32         height,         ///< [in] height
++    UINT_32         numSlices,      ///< [in] number of slices
++    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
++    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
++    BOOL_32         isWidth8,       ///< [in] Not used by SI
++    BOOL_32         isHeight8,      ///< [in] Not used by SI
++    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
++    UINT_32*        pX,             ///< [out] x coord
++    UINT_32*        pY,             ///< [out] y coord
++    UINT_32*        pSlice          ///< [out] slice index
++    ) const
++{
++    UINT_32 newPitch;
++    UINT_32 newHeight;
++    UINT_64 totalBytes;
++    UINT_32 clWidth;
++    UINT_32 clHeight;
++    UINT_32 tileNumPerPipe;
++    UINT_64 sliceBytes;
++
++    *pX = 0;
++    *pY = 0;
++    *pSlice = 0;
++
++    if (factor == 2) //CMASK
++    {
++        ADDR_CMASK_FLAGS flags = {{0}};
++
++        tileNumPerPipe = 256;
++
++        ComputeCmaskInfo(flags,
++                         pitch,
++                         height,
++                         numSlices,
++                         isLinear,
++                         pTileInfo,
++                         &newPitch,
++                         &newHeight,
++                         &totalBytes,
++                         &clWidth,
++                         &clHeight);
++    }
++    else //HTile
++    {
++        ADDR_HTILE_FLAGS flags = {{0}};
++
++        tileNumPerPipe = 512;
++
++        ComputeHtileInfo(flags,
++                         pitch,
++                         height,
++                         numSlices,
++                         isLinear,
++                         TRUE,
++                         TRUE,
++                         pTileInfo,
++                         &newPitch,
++                         &newHeight,
++                         &totalBytes,
++                         &clWidth,
++                         &clHeight,
++                         &sliceBytes);
++    }
++
++    const UINT_32 pitchInTile = newPitch / MicroTileWidth;
++    const UINT_32 heightInTile = newHeight / MicroTileWidth;
++    const UINT_32 pitchInMacroTile = pitchInTile / 4;
++    UINT_32 macroShift;
++    UINT_32 elemIdxBits;
++    // get macroShift and elemIdxBits
++    TileCoordToMaskElementIndex(0, 0, pTileInfo->pipeConfig, &macroShift, &elemIdxBits);
++
++    const UINT_32 numPipes = HwlGetPipes(pTileInfo);
++    const UINT_32 pipe = (UINT_32)((addr / m_pipeInterleaveBytes) % numPipes);
++    // per pipe
++    UINT_64 localOffset = (addr % m_pipeInterleaveBytes) +
++        (addr / m_pipeInterleaveBytes / numPipes)* m_pipeInterleaveBytes;
++
++    UINT_32 tileIndex;
++    if (factor == 2) //CMASK
++    {
++        tileIndex = (UINT_32)(localOffset * 2 + (bitPosition != 0));
++    }
++    else
++    {
++        tileIndex = (UINT_32)(localOffset / 4);
++    }
++
++    UINT_32 macroOffset;
++    if (isLinear)
++    {
++        UINT_32 sliceSizeInTile = pitchInTile * heightInTile;
++
++        // do htile single slice alignment if the flag is true
++        if (m_configFlags.useHtileSliceAlign && (factor == 1))  //Htile
++        {
++            sliceSizeInTile = PowTwoAlign(sliceSizeInTile, static_cast<UINT_32>(sliceBytes) / 64);
++        }
++        *pSlice = tileIndex / (sliceSizeInTile / numPipes);
++        macroOffset = tileIndex % (sliceSizeInTile / numPipes);
++    }
++    else
++    {
++        const UINT_32 clWidthInTile = clWidth / MicroTileWidth; // Now in unit of Tiles
++        const UINT_32 clHeightInTile = clHeight / MicroTileHeight;
++        const UINT_32 pitchInCL = pitchInTile / clWidthInTile;
++        const UINT_32 heightInCL = heightInTile / clHeightInTile;
++        const UINT_32 clIndex = tileIndex / tileNumPerPipe;
++
++        UINT_32 clX = clIndex % pitchInCL;
++        UINT_32 clY = (clIndex % (heightInCL * pitchInCL)) / pitchInCL;
++
++        *pX = clX * clWidthInTile * MicroTileWidth;
++        *pY = clY * clHeightInTile * MicroTileHeight;
++        *pSlice = clIndex / (heightInCL * pitchInCL);
++
++        macroOffset = tileIndex % tileNumPerPipe;
++    }
++
++    UINT_32 elemIdx = macroOffset & 7;
++    macroOffset >>= elemIdxBits;
++
++    if (elemIdxBits != macroShift)
++    {
++        macroOffset <<= (elemIdxBits - macroShift);
++
++        UINT_32 pipebit1 = _BIT(pipe,1);
++        UINT_32 pipebit2 = _BIT(pipe,2);
++        UINT_32 pipebit3 = _BIT(pipe,3);
++        if (pitchInMacroTile % 2)
++        {   //odd
++            switch (pTileInfo->pipeConfig)
++            {
++                case ADDR_PIPECFG_P4_32x32:
++                    macroOffset |= pipebit1;
++                    break;
++                case ADDR_PIPECFG_P8_32x32_8x16:
++                case ADDR_PIPECFG_P8_32x32_16x16:
++                case ADDR_PIPECFG_P8_32x32_16x32:
++                    macroOffset |= pipebit2;
++                    break;
++                default:
++                    break;
++            }
++
++        }
++
++        if (pitchInMacroTile % 4)
++        {
++            if (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)
++            {
++                macroOffset |= (pipebit1<<1);
++            }
++            if((pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
++               (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_16x16))
++            {
++                macroOffset |= (pipebit3<<1);
++            }
++        }
++    }
++
++    UINT_32 macroX;
++    UINT_32 macroY;
++
++    if (isLinear)
++    {
++        macroX = macroOffset % pitchInMacroTile;
++        macroY = macroOffset / pitchInMacroTile;
++    }
++    else
++    {
++        const UINT_32 clWidthInMacroTile = clWidth / (MicroTileWidth * 4);
++        macroX = macroOffset % clWidthInMacroTile;
++        macroY = macroOffset / clWidthInMacroTile;
++    }
++
++    *pX += macroX * 4 * MicroTileWidth;
++    *pY += macroY * 4 * MicroTileHeight;
++
++    UINT_32 microX;
++    UINT_32 microY;
++    ComputeTileCoordFromPipeAndElemIdx(elemIdx, pipe, pTileInfo->pipeConfig, pitchInMacroTile,
++                                       *pX, *pY, &microX, &microY);
++
++    *pX += microX * MicroTileWidth;
++    *pY += microY * MicroTileWidth;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlGetPitchAlignmentLinear
++*   @brief
++*       Get pitch alignment
++*   @return
++*       pitch alignment
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::HwlGetPitchAlignmentLinear(
++    UINT_32             bpp,    ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags   ///< [in] surface flags
++    ) const
++{
++    UINT_32 pitchAlign;
++
++    // Interleaved access requires a 256B aligned pitch, so fall back to pre-SI alignment
++    if (flags.interleaved)
++    {
++        pitchAlign = Max(64u, m_pipeInterleaveBytes / BITS_TO_BYTES(bpp));
++
++    }
++    else
++    {
++        pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp));
++    }
++
++    return pitchAlign;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlGetSizeAdjustmentLinear
++*
++*   @brief
++*       Adjust linear surface pitch and slice size
++*
++*   @return
++*       Logical slice size in bytes
++***************************************************************************************************
++*/
++UINT_64 SIAddrLib::HwlGetSizeAdjustmentLinear(
++    AddrTileMode        tileMode,       ///< [in] tile mode
++    UINT_32             bpp,            ///< [in] bits per pixel
++    UINT_32             numSamples,     ///< [in] number of samples
++    UINT_32             baseAlign,      ///< [in] base alignment
++    UINT_32             pitchAlign,     ///< [in] pitch alignment
++    UINT_32*            pPitch,         ///< [in/out] pointer to pitch
++    UINT_32*            pHeight,        ///< [in/out] pointer to height
++    UINT_32*            pHeightAlign    ///< [in/out] pointer to height align
++    ) const
++{
++    UINT_64 sliceSize;
++    if (tileMode == ADDR_TM_LINEAR_GENERAL)
++    {
++        sliceSize = BITS_TO_BYTES(static_cast<UINT_64>(*pPitch) * (*pHeight) * bpp * numSamples);
++    }
++    else
++    {
++        UINT_32 pitch   = *pPitch;
++        UINT_32 height  = *pHeight;
++
++        UINT_32 pixelsPerPipeInterleave = m_pipeInterleaveBytes / BITS_TO_BYTES(bpp);
++        UINT_32 sliceAlignInPixel = pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave;
++
++        // numSamples should be 1 in real cases (no MSAA for linear but TGL may pass non 1 value)
++        UINT_64 pixelPerSlice = static_cast<UINT_64>(pitch) * height * numSamples;
++
++        while (pixelPerSlice % sliceAlignInPixel)
++        {
++            pitch += pitchAlign;
++            pixelPerSlice = static_cast<UINT_64>(pitch) * height * numSamples;
++        }
++
++        *pPitch = pitch;
++
++        UINT_32 heightAlign = 1;
++
++        while ((pitch * heightAlign) % sliceAlignInPixel)
++        {
++            heightAlign++;
++        }
++
++        *pHeightAlign = heightAlign;
++
++        sliceSize = BITS_TO_BYTES(pixelPerSlice * bpp);
++    }
++
++    return sliceSize;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlPreHandleBaseLvl3xPitch
++*
++*   @brief
++*       Pre-handler of 3x pitch (96 bit) adjustment
++*
++*   @return
++*       Expected pitch
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::HwlPreHandleBaseLvl3xPitch(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
++    UINT_32                                 expPitch    ///< [in] pitch
++    ) const
++{
++    ADDR_ASSERT(pIn->width == expPitch);
++
++    // From SI, if pow2Pad is 1 the pitch is expanded 3x first, then padded to pow2, so nothing to
++    // do here
++    if (!pIn->flags.pow2Pad)
++    {
++        AddrLib::HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
++    }
++    else
++    {
++        ADDR_ASSERT(IsPow2(expPitch));
++    }
++
++    return expPitch;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlPostHandleBaseLvl3xPitch
++*
++*   @brief
++*       Post-handler of 3x pitch adjustment
++*
++*   @return
++*       Expected pitch
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::HwlPostHandleBaseLvl3xPitch(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
++    UINT_32                                 expPitch    ///< [in] pitch
++    ) const
++{
++    /**
++     * @note The pitch will be divided by 3 in the end so the value will look odd but h/w should
++     *  be able to compute a correct pitch from it as h/w address library is doing the job.
++     */
++    // From SI, the pitch is expanded 3x first, then padded to pow2, so no special handler here
++    if (!pIn->flags.pow2Pad)
++    {
++        AddrLib::HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
++    }
++
++    return expPitch;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlGetPitchAlignmentMicroTiled
++*
++*   @brief
++*       Compute 1D tiled surface pitch alignment
++*
++*   @return
++*       pitch alignment
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::HwlGetPitchAlignmentMicroTiled(
++    AddrTileMode        tileMode,          ///< [in] tile mode
++    UINT_32             bpp,               ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
++    UINT_32             numSamples         ///< [in] number of samples
++    ) const
++{
++    UINT_32 pitchAlign;
++
++    if (flags.qbStereo)
++    {
++        pitchAlign = EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled(tileMode,bpp,flags,numSamples);
++    }
++    else
++    {
++        pitchAlign = 8;
++    }
++
++    return pitchAlign;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlGetSizeAdjustmentMicroTiled
++*
++*   @brief
++*       Adjust 1D tiled surface pitch and slice size
++*
++*   @return
++*       Logical slice size in bytes
++***************************************************************************************************
++*/
++UINT_64 SIAddrLib::HwlGetSizeAdjustmentMicroTiled(
++    UINT_32             thickness,      ///< [in] thickness
++    UINT_32             bpp,            ///< [in] bits per pixel
++    ADDR_SURFACE_FLAGS  flags,          ///< [in] surface flags
++    UINT_32             numSamples,     ///< [in] number of samples
++    UINT_32             baseAlign,      ///< [in] base alignment
++    UINT_32             pitchAlign,     ///< [in] pitch alignment
++    UINT_32*            pPitch,         ///< [in/out] pointer to pitch
++    UINT_32*            pHeight         ///< [in/out] pointer to height
++    ) const
++{
++    UINT_64 logicalSliceSize;
++    UINT_64 physicalSliceSize;
++
++    UINT_32 pitch   = *pPitch;
++    UINT_32 height  = *pHeight;
++
++    // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
++    logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
++
++    // Physical slice: multiplied by thickness
++    physicalSliceSize =  logicalSliceSize * thickness;
++
++    // Pitch alignment is always 8, so if slice size is not padded to base alignment
++    // (pipe_interleave_size), we need to increase pitch
++    while ((physicalSliceSize % baseAlign) != 0)
++    {
++        pitch += pitchAlign;
++
++        logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
++
++        physicalSliceSize =  logicalSliceSize * thickness;
++    }
++
++#if !ALT_TEST
++    //
++    // Special workaround for depth/stencil buffer, use 8 bpp to align depth buffer again since
++    // the stencil plane may have larger pitch if the slice size is smaller than base alignment.
++    //
++    // Note: this actually does not work for mipmap but mipmap depth texture is not really
++    // sampled with mipmap.
++    //
++    if (flags.depth && !flags.noStencil)
++    {
++        ADDR_ASSERT(numSamples == 1);
++
++        UINT_64 logicalSiceSizeStencil = static_cast<UINT_64>(pitch) * height; // 1 byte stencil
++
++        while ((logicalSiceSizeStencil % baseAlign) != 0)
++        {
++            pitch += pitchAlign; // Stencil plane's pitch alignment is the same as depth plane's
++
++            logicalSiceSizeStencil = static_cast<UINT_64>(pitch) * height;
++        }
++
++        if (pitch != *pPitch)
++        {
++            // If this is a mipmap, this padded one cannot be sampled as a whole mipmap!
++            logicalSliceSize = logicalSiceSizeStencil * BITS_TO_BYTES(bpp);
++        }
++    }
++#endif
++    *pPitch = pitch;
++
++    // No adjust for pHeight
++
++    return logicalSliceSize;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlConvertChipFamily
++*
++*   @brief
++*       Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
++*   @return
++*       AddrChipFamily
++***************************************************************************************************
++*/
++AddrChipFamily SIAddrLib::HwlConvertChipFamily(
++    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
++    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
++{
++    AddrChipFamily family = ADDR_CHIP_FAMILY_SI;
++
++    switch (uChipFamily)
++    {
++        case FAMILY_SI:
++            m_settings.isSouthernIsland = 1;
++            m_settings.isTahiti     = ASICREV_IS_TAHITI_P(uChipRevision);
++            m_settings.isPitCairn   = ASICREV_IS_PITCAIRN_PM(uChipRevision);
++            m_settings.isCapeVerde  = ASICREV_IS_CAPEVERDE_M(uChipRevision);
++            m_settings.isOland      = ASICREV_IS_OLAND_M(uChipRevision);
++            m_settings.isHainan     = ASICREV_IS_HAINAN_V(uChipRevision);
++            break;
++        default:
++            ADDR_ASSERT(!"This should be a Fusion");
++            break;
++    }
++
++    return family;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlSetupTileInfo
++*
++*   @brief
++*       Setup default value of tile info for SI
++***************************************************************************************************
++*/
++VOID SIAddrLib::HwlSetupTileInfo(
++    AddrTileMode                        tileMode,       ///< [in] Tile mode
++    ADDR_SURFACE_FLAGS                  flags,          ///< [in] Surface type flags
++    UINT_32                             bpp,            ///< [in] Bits per pixel
++    UINT_32                             pitch,          ///< [in] Pitch in pixels
++    UINT_32                             height,         ///< [in] Height in pixels
++    UINT_32                             numSamples,     ///< [in] Number of samples
++    ADDR_TILEINFO*                      pTileInfoIn,    ///< [in] Tile info input: NULL for default
++    ADDR_TILEINFO*                      pTileInfoOut,   ///< [out] Tile info output
++    AddrTileType                        inTileType,     ///< [in] Tile type
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut            ///< [out] Output
++    ) const
++{
++    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
++    ADDR_TILEINFO* pTileInfo = pTileInfoOut;
++    INT index = TileIndexInvalid;
++
++    // Fail-safe code
++    if (!IsLinear(tileMode))
++    {
++        // 128 bpp/thick tiling must be non-displayable.
++        // Fmask reuse color buffer's entry but bank-height field can be from another entry
++        // To simplify the logic, fmask entry should be picked from non-displayable ones
++        if (bpp == 128 || thickness > 1 || flags.fmask || flags.prt)
++        {
++            inTileType = ADDR_NON_DISPLAYABLE;
++        }
++
++        if (flags.depth || flags.stencil)
++        {
++            inTileType = ADDR_DEPTH_SAMPLE_ORDER;
++        }
++    }
++
++    // Partial valid fields are not allowed for SI.
++    if (IsTileInfoAllZero(pTileInfo))
++    {
++        if (IsMacroTiled(tileMode))
++        {
++            if (flags.prt)
++            {
++                if (numSamples == 1)
++                {
++                    if (flags.depth)
++                    {
++                        switch (bpp)
++                        {
++                            case 16:
++                                index = 3;
++                                break;
++                            case 32:
++                                index = 6;
++                                break;
++                            default:
++                                ADDR_ASSERT_ALWAYS();
++                                break;
++                        }
++                    }
++                    else
++                    {
++                        switch (bpp)
++                        {
++                            case 8:
++                                index = 21;
++                                break;
++                            case 16:
++                                index = 22;
++                                break;
++                            case 32:
++                                index = 23;
++                                break;
++                            case 64:
++                                index = 24;
++                                break;
++                            case 128:
++                                index = 25;
++                                break;
++                            default:
++                                break;
++                        }
++
++                        if (thickness > 1)
++                        {
++                            ADDR_ASSERT(bpp != 128);
++                            index += 5;
++                        }
++                    }
++                }
++                else
++                {
++                    ADDR_ASSERT(numSamples == 4);
++
++                    if (flags.depth)
++                    {
++                        switch (bpp)
++                        {
++                            case 16:
++                                index = 5;
++                                break;
++                            case 32:
++                                index = 7;
++                                break;
++                            default:
++                                ADDR_ASSERT_ALWAYS();
++                                break;
++                        }
++                    }
++                    else
++                    {
++                        switch (bpp)
++                        {
++                            case 8:
++                                index = 23;
++                                break;
++                            case 16:
++                                index = 24;
++                                break;
++                            case 32:
++                                index = 25;
++                                break;
++                            case 64:
++                                index = 30;
++                                break;
++                            default:
++                                ADDR_ASSERT_ALWAYS();
++                                break;
++                        }
++                    }
++                }
++            }//end of PRT part
++            // See table entries 0-7
++            else if (flags.depth || flags.stencil)
++            {
++                if (flags.compressZ)
++                {
++                    if (flags.stencil)
++                    {
++                        index = 0;
++                    }
++                    else
++                    {
++                        // optimal tile index for compressed depth/stencil.
++                        switch (numSamples)
++                        {
++                            case 1:
++                                index = 0;
++                                break;
++                            case 2:
++                            case 4:
++                                index = 1;
++                                break;
++                            case 8:
++                                index = 2;
++                                break;
++                            default:
++                                break;
++                        }
++                    }
++                }
++                else // unCompressZ
++                {
++                    index = 3;
++                }
++            }
++            else //non PRT & non Depth & non Stencil
++            {
++                // See table entries 9-12
++                if (inTileType == ADDR_DISPLAYABLE)
++                {
++                    switch (bpp)
++                    {
++                        case 8:
++                            index = 10;
++                            break;
++                        case 16:
++                            index = 11;
++                            break;
++                        case 32:
++                            index = 12;
++                            break;
++                        case 64:
++                            index = 12;
++                            break;
++                        default:
++                            break;
++                    }
++                }
++                else
++                {
++                    // See table entries 13-17
++                    if (thickness == 1)
++                    {
++                        if (flags.fmask)
++                        {
++                            UINT_32 fmaskPixelSize = bpp * numSamples;
++
++                            switch (fmaskPixelSize)
++                            {
++                                case 8:
++                                    index = 14;
++                                    break;
++                                case 16:
++                                    index = 15;
++                                    break;
++                                case 32:
++                                    index = 16;
++                                    break;
++                                case 64:
++                                    index = 17;
++                                    break;
++                                default:
++                                    ADDR_ASSERT_ALWAYS();
++                            }
++                        }
++                        else
++                        {
++                            switch (bpp)
++                            {
++                                case 8:
++                                    index = 14;
++                                    break;
++                                case 16:
++                                    index = 15;
++                                    break;
++                                case 32:
++                                    index = 16;
++                                    break;
++                                case 64:
++                                    index = 17;
++                                    break;
++                                case 128:
++                                    index = 17;
++                                    break;
++                                default:
++                                    break;
++                            }
++                        }
++                    }
++                    else // thick tiling - entries 18-20
++                    {
++                        switch (thickness)
++                        {
++                            case 4:
++                                index = 20;
++                                break;
++                            case 8:
++                                index = 19;
++                                break;
++                            default:
++                                break;
++                        }
++                    }
++                }
++            }
++        }
++        else
++        {
++            if (tileMode == ADDR_TM_LINEAR_ALIGNED)
++            {
++                index = 8;
++            }
++            else if (tileMode == ADDR_TM_LINEAR_GENERAL)
++            {
++                index = TileIndexLinearGeneral;
++            }
++            else
++            {
++                if (flags.depth || flags.stencil)
++                {
++                    index = 4;
++                }
++                else if (inTileType == ADDR_DISPLAYABLE)
++                {
++                    index = 9;
++                }
++                else if (thickness == 1)
++                {
++                    index = 13;
++                }
++                else
++                {
++                    index = 18;
++                }
++            }
++        }
++
++        if (index >= 0 && index <= 31)
++        {
++            *pTileInfo      = m_tileTable[index].info;
++            pOut->tileType  = m_tileTable[index].type;
++        }
++
++        if (index == TileIndexLinearGeneral)
++        {
++            *pTileInfo      = m_tileTable[8].info;
++            pOut->tileType  = m_tileTable[8].type;
++        }
++    }
++    else
++    {
++        if (pTileInfoIn)
++        {
++            if (flags.stencil && pTileInfoIn->tileSplitBytes == 0)
++            {
++                // Stencil always uses index 0
++                *pTileInfo = m_tileTable[0].info;
++            }
++        }
++        // Pass through tile type
++        pOut->tileType = inTileType;
++    }
++
++    pOut->tileIndex = index;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::DecodeGbRegs
++*
++*   @brief
++*       Decodes GB_ADDR_CONFIG and noOfBanks/noOfRanks
++*
++*   @return
++*       TRUE if all settings are valid
++*
++***************************************************************************************************
++*/
++BOOL_32 SIAddrLib::DecodeGbRegs(
++    const ADDR_REGISTER_VALUE* pRegValue) ///< [in] create input
++{
++    GB_ADDR_CONFIG  reg;
++    BOOL_32         valid = TRUE;
++
++    reg.val = pRegValue->gbAddrConfig;
++
++    switch (reg.f.pipe_interleave_size)
++    {
++        case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
++            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
++            break;
++        case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
++            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
++            break;
++        default:
++            valid = FALSE;
++            ADDR_UNHANDLED_CASE();
++            break;
++    }
++
++    switch (reg.f.row_size)
++    {
++        case ADDR_CONFIG_1KB_ROW:
++            m_rowSize = ADDR_ROWSIZE_1KB;
++            break;
++        case ADDR_CONFIG_2KB_ROW:
++            m_rowSize = ADDR_ROWSIZE_2KB;
++            break;
++        case ADDR_CONFIG_4KB_ROW:
++            m_rowSize = ADDR_ROWSIZE_4KB;
++            break;
++        default:
++            valid = FALSE;
++            ADDR_UNHANDLED_CASE();
++            break;
++    }
++
++    switch (pRegValue->noOfBanks)
++    {
++        case 0:
++            m_banks = 4;
++            break;
++        case 1:
++            m_banks = 8;
++            break;
++        case 2:
++            m_banks = 16;
++            break;
++        default:
++            valid = FALSE;
++            ADDR_UNHANDLED_CASE();
++            break;
++    }
++
++    switch (pRegValue->noOfRanks)
++    {
++        case 0:
++            m_ranks = 1;
++            break;
++        case 1:
++            m_ranks = 2;
++            break;
++        default:
++            valid = FALSE;
++            ADDR_UNHANDLED_CASE();
++            break;
++    }
++
++    m_logicalBanks = m_banks * m_ranks;
++
++    ADDR_ASSERT(m_logicalBanks <= 16);
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlInitGlobalParams
++*
++*   @brief
++*       Initializes global parameters
++*
++*   @return
++*       TRUE if all settings are valid
++*
++***************************************************************************************************
++*/
++BOOL_32 SIAddrLib::HwlInitGlobalParams(
++    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
++{
++    BOOL_32 valid = TRUE;
++    const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
++
++    valid = DecodeGbRegs(pRegValue);
++
++    if (valid)
++    {
++        if (m_settings.isTahiti || m_settings.isPitCairn)
++        {
++            m_pipes = 8;
++        }
++        else if (m_settings.isCapeVerde || m_settings.isOland)
++        {
++            m_pipes = 4;
++        }
++        else
++        {
++            // Hainan is 2-pipe (m_settings.isHainan == 1)
++            m_pipes = 2;
++        }
++
++        valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
++
++        m_maxSamples = 16;
++    }
++
++    return valid;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlConvertTileInfoToHW
++*   @brief
++*       Entry of si's ConvertTileInfoToHW
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE SIAddrLib::HwlConvertTileInfoToHW(
++    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
++    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
++    ) const
++{
++    ADDR_E_RETURNCODE retCode   = ADDR_OK;
++
++    retCode = EgBasedAddrLib::HwlConvertTileInfoToHW(pIn, pOut);
++
++    if (retCode == ADDR_OK)
++    {
++        if (pIn->reverse == FALSE)
++        {
++            if (pIn->pTileInfo->pipeConfig == ADDR_PIPECFG_INVALID)
++            {
++                retCode = ADDR_INVALIDPARAMS;
++            }
++            else
++            {
++                pOut->pTileInfo->pipeConfig =
++                    static_cast<AddrPipeCfg>(pIn->pTileInfo->pipeConfig - 1);
++            }
++        }
++        else
++        {
++            pOut->pTileInfo->pipeConfig =
++                static_cast<AddrPipeCfg>(pIn->pTileInfo->pipeConfig + 1);
++        }
++    }
++
++    return retCode;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeXmaskCoordYFrom8Pipe
++*
++*   @brief
++*       Compute the Y coord which will be added to Xmask Y
++*       coord.
++*   @return
++*       Y coord
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::HwlComputeXmaskCoordYFrom8Pipe(
++    UINT_32         pipe,       ///< [in] pipe id
++    UINT_32         x           ///< [in] tile coord x, which is original x coord / 8
++    ) const
++{
++    // This function should never be called since it is 6xx/8xx specfic.
++    // Keep this empty implementation to avoid any mis-use.
++    ADDR_ASSERT_ALWAYS();
++
++    return 0;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeSurfaceCoord2DFromBankPipe
++*
++*   @brief
++*       Compute surface x,y coordinates from bank/pipe info
++*   @return
++*       N/A
++***************************************************************************************************
++*/
++VOID SIAddrLib::HwlComputeSurfaceCoord2DFromBankPipe(
++    AddrTileMode        tileMode,   ///< [in] tile mode
++    UINT_32*            pX,         ///< [in/out] x coordinate
++    UINT_32*            pY,         ///< [in/out] y coordinate
++    UINT_32             slice,      ///< [in] slice index
++    UINT_32             bank,       ///< [in] bank number
++    UINT_32             pipe,       ///< [in] pipe number
++    UINT_32             bankSwizzle,///< [in] bank swizzle
++    UINT_32             pipeSwizzle,///< [in] pipe swizzle
++    UINT_32             tileSlices, ///< [in] slices in a micro tile
++    BOOL_32             ignoreSE,   ///< [in] TRUE if shader engines are ignored
++    ADDR_TILEINFO*      pTileInfo   ///< [in] bank structure. **All fields to be valid on entry**
++    ) const
++{
++    UINT_32 xBit;
++    UINT_32 yBit;
++    UINT_32 yBit3 = 0;
++    UINT_32 yBit4 = 0;
++    UINT_32 yBit5 = 0;
++    UINT_32 yBit6 = 0;
++
++    UINT_32 xBit3 = 0;
++    UINT_32 xBit4 = 0;
++    UINT_32 xBit5 = 0;
++
++    UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
++
++    CoordFromBankPipe xyBits = {0};
++    ComputeSurfaceCoord2DFromBankPipe(tileMode, *pX, *pY, slice, bank, pipe,
++                                      bankSwizzle, pipeSwizzle, tileSlices, pTileInfo,
++                                      &xyBits);
++    yBit3 = xyBits.yBit3;
++    yBit4 = xyBits.yBit4;
++    yBit5 = xyBits.yBit5;
++    yBit6 = xyBits.yBit6;
++
++    xBit3 = xyBits.xBit3;
++    xBit4 = xyBits.xBit4;
++    xBit5 = xyBits.xBit5;
++
++    yBit = xyBits.yBits;
++
++    UINT_32 yBitTemp = 0;
++
++    if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
++        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))
++    {
++        ADDR_ASSERT(pTileInfo->bankWidth == 1 && pTileInfo->macroAspectRatio > 1);
++        UINT_32 yBitToCheck = QLog2(pTileInfo->banks) - 1;
++
++        ADDR_ASSERT(yBitToCheck <= 3);
++
++        yBitTemp = _BIT(yBit, yBitToCheck);
++
++        xBit3 = 0;
++    }
++
++    yBit = Bits2Number(4, yBit6, yBit5, yBit4, yBit3);
++    xBit = Bits2Number(3, xBit5, xBit4, xBit3);
++
++    *pY += yBit * pTileInfo->bankHeight * MicroTileHeight;
++    *pX += xBit * numPipes * pTileInfo->bankWidth * MicroTileWidth;
++
++    //calculate the bank and pipe bits in x, y
++    UINT_32 xTile; //x in micro tile
++    UINT_32 x3 = 0;
++    UINT_32 x4 = 0;
++    UINT_32 x5 = 0;
++    UINT_32 x6 = 0;
++    UINT_32 y = *pY;
++
++    UINT_32 pipeBit0 = _BIT(pipe,0);
++    UINT_32 pipeBit1 = _BIT(pipe,1);
++    UINT_32 pipeBit2 = _BIT(pipe,2);
++
++    UINT_32 y3 = _BIT(y, 3);
++    UINT_32 y4 = _BIT(y, 4);
++    UINT_32 y5 = _BIT(y, 5);
++    UINT_32 y6 = _BIT(y, 6);
++
++    // bankbit0 after ^x4^x5
++    UINT_32 bankBit00 = _BIT(bank,0);
++    UINT_32 bankBit0 = 0;
++
++    switch (pTileInfo->pipeConfig)
++    {
++        case ADDR_PIPECFG_P2:
++            x3 = pipeBit0 ^ y3;
++            break;
++        case ADDR_PIPECFG_P4_8x16:
++            x4 = pipeBit0 ^ y3;
++            x3 = pipeBit0 ^ y4;
++            break;
++        case ADDR_PIPECFG_P4_16x16:
++            x4 = pipeBit1 ^ y4;
++            x3 = pipeBit0 ^ y3 ^ x4;
++            break;
++        case ADDR_PIPECFG_P4_16x32:
++            x4 = pipeBit1 ^ y4;
++            x3 = pipeBit0 ^ y3 ^ x4;
++            break;
++        case ADDR_PIPECFG_P4_32x32:
++            x5 = pipeBit1 ^ y5;
++            x3 = pipeBit0 ^ y3 ^ x5;
++            bankBit0 = yBitTemp ^ x5;
++            x4 = bankBit00 ^ x5 ^ bankBit0;
++            *pX += x5 * 4 * 1 * 8; // x5 * num_pipes * bank_width * 8;
++            break;
++        case ADDR_PIPECFG_P8_16x16_8x16:
++            x3 = pipeBit1 ^ y5;
++            x4 = pipeBit2 ^ y4;
++            x5 = pipeBit0 ^ y3 ^ x4;
++            break;
++        case ADDR_PIPECFG_P8_16x32_8x16:
++            x3 = pipeBit1 ^ y4;
++            x4 = pipeBit2 ^ y5;
++            x5 = pipeBit0 ^ y3 ^ x4;
++            break;
++        case ADDR_PIPECFG_P8_32x32_8x16:
++            x3 = pipeBit1 ^ y4;
++            x5 = pipeBit2 ^ y5;
++            x4 = pipeBit0 ^ y3 ^ x5;
++            break;
++        case ADDR_PIPECFG_P8_16x32_16x16:
++            x4 = pipeBit2 ^ y5;
++            x5 = pipeBit1 ^ y4;
++            x3 = pipeBit0 ^ y3 ^ x4;
++            break;
++        case ADDR_PIPECFG_P8_32x32_16x16:
++            x5 = pipeBit2 ^ y5;
++            x4 = pipeBit1 ^ y4;
++            x3 = pipeBit0 ^ y3 ^ x4;
++            break;
++        case ADDR_PIPECFG_P8_32x32_16x32:
++            x5 = pipeBit2 ^ y5;
++            x4 = pipeBit1 ^ y6;
++            x3 = pipeBit0 ^ y3 ^ x4;
++            break;
++        case ADDR_PIPECFG_P8_32x64_32x32:
++            x6 = pipeBit1 ^ y5;
++            x5 = pipeBit2 ^ y6;
++            x3 = pipeBit0 ^ y3 ^ x5;
++            bankBit0 = yBitTemp ^ x6;
++            x4 = bankBit00 ^ x5 ^ bankBit0;
++            *pX += x6 * 8 * 1 * 8; // x6 * num_pipes * bank_width * 8;
++            break;
++        default:
++            ADDR_ASSERT_ALWAYS();
++    }
++
++    xTile = Bits2Number(3, x5, x4, x3);
++
++    *pX += xTile << 3;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlPreAdjustBank
++*
++*   @brief
++*       Adjust bank before calculating address acoording to bank/pipe
++*   @return
++*       Adjusted bank
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::HwlPreAdjustBank(
++    UINT_32         tileX,      ///< [in] x coordinate in unit of tile
++    UINT_32         bank,       ///< [in] bank
++    ADDR_TILEINFO*  pTileInfo   ///< [in] tile info
++    ) const
++{
++    if (((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
++        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) && (pTileInfo->bankWidth == 1))
++    {
++        UINT_32 bankBit0 = _BIT(bank, 0);
++        UINT_32 x4 = _BIT(tileX, 1);
++        UINT_32 x5 = _BIT(tileX, 2);
++
++        bankBit0 = bankBit0 ^ x4 ^ x5;
++        bank |= bankBit0;
++
++        ADDR_ASSERT(pTileInfo->macroAspectRatio > 1)
++    }
++
++    return bank;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeSurfaceInfo
++*
++*   @brief
++*       Entry of si's ComputeSurfaceInfo
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE SIAddrLib::HwlComputeSurfaceInfo(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
++    ) const
++{
++    pOut->tileIndex = pIn->tileIndex;
++
++    return EgBasedAddrLib::HwlComputeSurfaceInfo(pIn,pOut);
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeMipLevel
++*   @brief
++*       Compute MipLevel info (including level 0)
++*   @return
++*       TRUE if HWL's handled
++***************************************************************************************************
++*/
++BOOL_32 SIAddrLib::HwlComputeMipLevel(
++    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in/out] Input structure
++    ) const
++{
++    // basePitch is calculated from level 0 so we only check this for mipLevel > 0
++    if (pIn->mipLevel > 0)
++    {
++        // Note: Don't check expand 3x formats(96 bit) as the basePitch is not pow2 even if
++        // we explicity set pow2Pad flag. The 3x base pitch is padded to pow2 but after being
++        // divided by expandX factor (3) - to program texture pitch, the basePitch is never pow2.
++        if (!AddrElemLib::IsExpand3x(pIn->format))
++        {
++            // Sublevel pitches are generated from base level pitch instead of width on SI
++            // If pow2Pad is 0, we don't assert - as this is not really used for a mip chain
++            ADDR_ASSERT(!pIn->flags.pow2Pad || ((pIn->basePitch != 0) && IsPow2(pIn->basePitch)));
++        }
++
++        if (pIn->basePitch != 0)
++        {
++            pIn->width = Max(1u, pIn->basePitch >> pIn->mipLevel);
++        }
++    }
++
++    // pow2Pad is done in PostComputeMipLevel
++
++    return TRUE;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlCheckLastMacroTiledLvl
++*
++*   @brief
++*       Sets pOut->last2DLevel to TRUE if it is
++*   @note
++*
++***************************************************************************************************
++*/
++VOID SIAddrLib::HwlCheckLastMacroTiledLvl(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut      ///< [in/out] Output structure (used as input, too)
++    ) const
++{
++    // pow2Pad covers all mipmap cases
++    if (pIn->flags.pow2Pad)
++    {
++        ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
++
++        UINT_32 nextPitch;
++        UINT_32 nextHeight;
++        UINT_32 nextSlices;
++
++        AddrTileMode nextTileMode;
++
++        if (pIn->mipLevel == 0 || pIn->basePitch == 0)
++        {
++            // Base level or fail-safe case (basePitch == 0)
++            nextPitch = pOut->pitch >> 1;
++        }
++        else
++        {
++            // Sub levels
++            nextPitch = pIn->basePitch >> (pIn->mipLevel + 1);
++        }
++
++        // nextHeight must be shifted from this level's original height rather than a pow2 padded
++        // one but this requires original height stored somewhere (pOut->height)
++        ADDR_ASSERT(pOut->height != 0);
++
++        // next level's height is just current level's >> 1 in pixels
++        nextHeight = pOut->height >> 1;
++        // Special format such as FMT_1 and FMT_32_32_32 can be linear only so we consider block
++        // compressed foramts
++        if (AddrElemLib::IsBlockCompressed(pIn->format))
++        {
++            nextHeight = (nextHeight + 3) / 4;
++        }
++        nextHeight = NextPow2(nextHeight);
++
++        // nextSlices may be 0 if this level's is 1
++        if (pIn->flags.volume)
++        {
++            nextSlices = Max(1u, pIn->numSlices >> 1);
++        }
++        else
++        {
++            nextSlices = pIn->numSlices;
++        }
++
++        nextTileMode = ComputeSurfaceMipLevelTileMode(pIn->tileMode,
++                                                      pIn->bpp,
++                                                      nextPitch,
++                                                      nextHeight,
++                                                      nextSlices,
++                                                      pIn->numSamples,
++                                                      pOut->pitchAlign,
++                                                      pOut->heightAlign,
++                                                      pOut->pTileInfo);
++
++        pOut->last2DLevel = IsMicroTiled(nextTileMode);
++    }
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlDegradeThickTileMode
++*
++*   @brief
++*       Degrades valid tile mode for thick modes if needed
++*
++*   @return
++*       Suitable tile mode
++***************************************************************************************************
++*/
++AddrTileMode SIAddrLib::HwlDegradeThickTileMode(
++    AddrTileMode        baseTileMode,   ///< [in] base tile mode
++    UINT_32             numSlices,      ///< [in] current number of slices
++    UINT_32*            pBytesPerTile   ///< [in/out] pointer to bytes per slice
++    ) const
++{
++    return EgBasedAddrLib::HwlDegradeThickTileMode(baseTileMode, numSlices, pBytesPerTile);
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlTileInfoEqual
++*
++*   @brief
++*       Return TRUE if all field are equal
++*   @note
++*       Only takes care of current HWL's data
++***************************************************************************************************
++*/
++BOOL_32 SIAddrLib::HwlTileInfoEqual(
++    const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
++    const ADDR_TILEINFO* pRight ///<[in] Right compare operand
++    ) const
++{
++    BOOL_32 equal = FALSE;
++
++    if (pLeft->pipeConfig == pRight->pipeConfig)
++    {
++        equal =  EgBasedAddrLib::HwlTileInfoEqual(pLeft, pRight);
++    }
++
++    return equal;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::GetTileSettings
++*
++*   @brief
++*       Get tile setting infos by index.
++*   @return
++*       Tile setting info.
++***************************************************************************************************
++*/
++const ADDR_TILECONFIG* SIAddrLib::GetTileSetting(
++    UINT_32 index          ///< [in] Tile index
++    ) const
++{
++    ADDR_ASSERT(index < m_noOfEntries);
++    return &m_tileTable[index];
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlPostCheckTileIndex
++*
++*   @brief
++*       Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
++*       tile mode/type/info and change the index if needed
++*   @return
++*       Tile index.
++***************************************************************************************************
++*/
++INT_32 SIAddrLib::HwlPostCheckTileIndex(
++    const ADDR_TILEINFO* pInfo,     ///< [in] Tile Info
++    AddrTileMode         mode,      ///< [in] Tile mode
++    AddrTileType         type,      ///< [in] Tile type
++    INT                  curIndex   ///< [in] Current index assigned in HwlSetupTileInfo
++    ) const
++{
++    INT_32 index = curIndex;
++
++    if (mode == ADDR_TM_LINEAR_GENERAL)
++    {
++        index = TileIndexLinearGeneral;
++    }
++    else
++    {
++        BOOL_32 macroTiled = IsMacroTiled(mode);
++
++        // We need to find a new index if either of them is true
++        // 1. curIndex is invalid
++        // 2. tile mode is changed
++        // 3. tile info does not match for macro tiled
++        if ((index == TileIndexInvalid         ||
++            (mode != m_tileTable[index].mode)  ||
++            (macroTiled && !HwlTileInfoEqual(pInfo, &m_tileTable[index].info))))
++        {
++            for (index = 0; index < static_cast<INT_32>(m_noOfEntries); index++)
++            {
++                if (macroTiled)
++                {
++                    // macro tile modes need all to match
++                    if (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) &&
++                        (mode == m_tileTable[index].mode)                 &&
++                        (type == m_tileTable[index].type))
++                    {
++                        break;
++                    }
++                }
++                else if (mode == ADDR_TM_LINEAR_ALIGNED)
++                {
++                    // linear mode only needs tile mode to match
++                    if (mode == m_tileTable[index].mode)
++                    {
++                        break;
++                    }
++                }
++                else
++                {
++                    // micro tile modes only need tile mode and tile type to match
++                    if (mode == m_tileTable[index].mode &&
++                        type == m_tileTable[index].type)
++                    {
++                        break;
++                    }
++                }
++            }
++        }
++    }
++
++    ADDR_ASSERT(index < static_cast<INT_32>(m_noOfEntries));
++
++    if (index >= static_cast<INT_32>(m_noOfEntries))
++    {
++        index = TileIndexInvalid;
++    }
++
++    return index;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlSetupTileCfg
++*
++*   @brief
++*       Map tile index to tile setting.
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE SIAddrLib::HwlSetupTileCfg(
++    INT_32          index,          ///< [in] Tile index
++    INT_32          macroModeIndex, ///< [in] Index in macro tile mode table(CI)
++    ADDR_TILEINFO*  pInfo,          ///< [out] Tile Info
++    AddrTileMode*   pMode,          ///< [out] Tile mode
++    AddrTileType*   pType          ///< [out] Tile type
++    ) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    // Global flag to control usage of tileIndex
++    if (UseTileIndex(index))
++    {
++        if (index == TileIndexLinearGeneral)
++        {
++            if (pMode)
++            {
++                *pMode = ADDR_TM_LINEAR_GENERAL;
++            }
++
++            if (pType)
++            {
++                *pType = ADDR_DISPLAYABLE;
++            }
++
++            if (pInfo)
++            {
++                pInfo->banks = 2;
++                pInfo->bankWidth = 1;
++                pInfo->bankHeight = 1;
++                pInfo->macroAspectRatio = 1;
++                pInfo->tileSplitBytes = 64;
++                pInfo->pipeConfig = ADDR_PIPECFG_P2;
++            }
++        }
++        else if (static_cast<UINT_32>(index) >= m_noOfEntries)
++        {
++            returnCode = ADDR_INVALIDPARAMS;
++        }
++        else
++        {
++            const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index);
++
++            if (pInfo)
++            {
++                *pInfo = pCfgTable->info;
++            }
++            else
++            {
++                if (IsMacroTiled(pCfgTable->mode))
++                {
++                    returnCode = ADDR_INVALIDPARAMS;
++                }
++            }
++
++            if (pMode)
++            {
++                *pMode = pCfgTable->mode;
++            }
++
++            if (pType)
++            {
++                *pType = pCfgTable->type;
++            }
++        }
++    }
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::ReadGbTileMode
++*
++*   @brief
++*       Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
++*   @return
++*       NA.
++***************************************************************************************************
++*/
++VOID SIAddrLib::ReadGbTileMode(
++    UINT_32             regValue,   ///< [in] GB_TILE_MODE register
++    ADDR_TILECONFIG*    pCfg        ///< [out] output structure
++    ) const
++{
++    GB_TILE_MODE gbTileMode;
++    gbTileMode.val = regValue;
++
++    pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode);
++    pCfg->info.bankHeight = 1 << gbTileMode.f.bank_height;
++    pCfg->info.bankWidth = 1 << gbTileMode.f.bank_width;
++    pCfg->info.banks = 1 << (gbTileMode.f.num_banks + 1);
++    pCfg->info.macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
++    pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
++    pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1);
++
++    UINT_32 regArrayMode = gbTileMode.f.array_mode;
++
++    pCfg->mode = static_cast<AddrTileMode>(regArrayMode);
++
++    if (regArrayMode == 8) //ARRAY_2D_TILED_XTHICK
++    {
++        pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
++    }
++    else if (regArrayMode >= 14) //ARRAY_3D_TILED_XTHICK
++    {
++        pCfg->mode = static_cast<AddrTileMode>(pCfg->mode + 3);
++    }
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::InitTileSettingTable
++*
++*   @brief
++*       Initialize the ADDR_TILE_CONFIG table.
++*   @return
++*       TRUE if tile table is correctly initialized
++***************************************************************************************************
++*/
++BOOL_32 SIAddrLib::InitTileSettingTable(
++    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
++    UINT_32         noOfEntries     ///< [in] Numbe of entries in the table above
++    )
++{
++    BOOL_32 initOk = TRUE;
++
++    ADDR_ASSERT(noOfEntries <= TileTableSize);
++
++    memset(m_tileTable, 0, sizeof(m_tileTable));
++
++    if (noOfEntries != 0)
++    {
++        m_noOfEntries = noOfEntries;
++    }
++    else
++    {
++        m_noOfEntries = TileTableSize;
++    }
++
++    if (pCfg) // From Client
++    {
++        for (UINT_32 i = 0; i < m_noOfEntries; i++)
++        {
++            ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
++        }
++    }
++    else
++    {
++        ADDR_ASSERT_ALWAYS();
++        initOk = FALSE;
++    }
++
++    if (initOk)
++    {
++        ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
++    }
++
++    return initOk;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlGetTileIndex
++*
++*   @brief
++*       Return the virtual/real index for given mode/type/info
++*   @return
++*       ADDR_OK if successful.
++***************************************************************************************************
++*/
++ADDR_E_RETURNCODE SIAddrLib::HwlGetTileIndex(
++    const ADDR_GET_TILEINDEX_INPUT* pIn,
++    ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
++{
++    ADDR_E_RETURNCODE returnCode = ADDR_OK;
++
++    pOut->index = HwlPostCheckTileIndex(pIn->pTileInfo, pIn->tileMode, pIn->tileType);
++
++    return returnCode;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlFmaskPreThunkSurfInfo
++*
++*   @brief
++*       Some preparation before thunking a ComputeSurfaceInfo call for Fmask
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++VOID SIAddrLib::HwlFmaskPreThunkSurfInfo(
++    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pFmaskIn,   ///< [in] Input of fmask info
++    const ADDR_COMPUTE_FMASK_INFO_OUTPUT*   pFmaskOut,  ///< [in] Output of fmask info
++    ADDR_COMPUTE_SURFACE_INFO_INPUT*        pSurfIn,    ///< [out] Input of thunked surface info
++    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pSurfOut    ///< [out] Output of thunked surface info
++    ) const
++{
++    pSurfIn->tileIndex = pFmaskIn->tileIndex;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlFmaskPostThunkSurfInfo
++*
++*   @brief
++*       Copy hwl extra field after calling thunked ComputeSurfaceInfo
++*   @return
++*       ADDR_E_RETURNCODE
++***************************************************************************************************
++*/
++VOID SIAddrLib::HwlFmaskPostThunkSurfInfo(
++    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,   ///< [in] Output of surface info
++    ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut           ///< [out] Output of fmask info
++    ) const
++{
++    pFmaskOut->macroModeIndex = TileIndexInvalid;
++    pFmaskOut->tileIndex = pSurfOut->tileIndex;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlComputeFmaskBits
++*   @brief
++*       Computes fmask bits
++*   @return
++*       Fmask bits
++***************************************************************************************************
++*/
++UINT_32 SIAddrLib::HwlComputeFmaskBits(
++    const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
++    UINT_32* pNumSamples
++    ) const
++{
++    UINT_32 numSamples = pIn->numSamples;
++    UINT_32 numFrags = GetNumFragments(numSamples, pIn->numFrags);
++    UINT_32 bpp;
++
++    if (numFrags != numSamples) // EQAA
++    {
++        ADDR_ASSERT(numFrags <= 8);
++
++        if (!pIn->resolved)
++        {
++            if (numFrags == 1)
++            {
++                bpp          = 1;
++                numSamples   = numSamples == 16 ? 16 : 8;
++            }
++            else if (numFrags == 2)
++            {
++                ADDR_ASSERT(numSamples >= 4);
++
++                bpp          = 2;
++                numSamples   = numSamples;
++            }
++            else if (numFrags == 4)
++            {
++                ADDR_ASSERT(numSamples >= 4);
++
++                bpp          = 4;
++                numSamples   = numSamples;
++            }
++            else // numFrags == 8
++            {
++                ADDR_ASSERT(numSamples == 16);
++
++                bpp          = 4;
++                numSamples   = numSamples;
++            }
++        }
++        else
++        {
++            if (numFrags == 1)
++            {
++                bpp          = (numSamples == 16) ? 16 : 8;
++                numSamples   = 1;
++            }
++            else if (numFrags == 2)
++            {
++                ADDR_ASSERT(numSamples >= 4);
++
++                bpp          = numSamples*2;
++                numSamples   = 1;
++            }
++            else if (numFrags == 4)
++            {
++                ADDR_ASSERT(numSamples >= 4);
++
++                bpp          = numSamples*4;
++                numSamples   = 1;
++            }
++            else // numFrags == 8
++            {
++                ADDR_ASSERT(numSamples >= 16);
++
++                bpp          = 16*4;
++                numSamples   = 1;
++            }
++        }
++    }
++    else // Normal AA
++    {
++        if (!pIn->resolved)
++        {
++            bpp          = ComputeFmaskNumPlanesFromNumSamples(numSamples);
++            numSamples   = numSamples == 2 ? 8 : numSamples;
++        }
++        else
++        {
++            // The same as 8XX
++            bpp          = ComputeFmaskResolvedBppFromNumSamples(numSamples);
++            numSamples   = 1; // 1x sample
++        }
++    }
++
++    SafeAssign(pNumSamples, numSamples);
++
++    return bpp;
++}
++
++/**
++***************************************************************************************************
++*   SIAddrLib::HwlOverrideTileMode
++*
++*   @brief
++*       Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI.
++*
++*   @return
++*       Suitable tile mode
++*
++***************************************************************************************************
++*/
++BOOL_32 SIAddrLib::HwlOverrideTileMode(
++    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,       ///< [in] input structure
++    AddrTileMode*                           pTileMode, ///< [in/out] pointer to the tile mode
++    AddrTileType*                           pTileType  ///< [in/out] pointer to the tile type
++    ) const
++{
++    BOOL_32 bOverrided = FALSE;
++    AddrTileMode tileMode = *pTileMode;
++
++    switch (tileMode)
++    {
++        case ADDR_TM_PRT_TILED_THIN1:
++            tileMode    = ADDR_TM_2D_TILED_THIN1;
++            break;
++
++        case ADDR_TM_PRT_TILED_THICK:
++            tileMode    = ADDR_TM_2D_TILED_THICK;
++            break;
++
++        case ADDR_TM_PRT_2D_TILED_THICK:
++            tileMode    = ADDR_TM_2D_TILED_THICK;
++            break;
++
++        case ADDR_TM_PRT_3D_TILED_THICK:
++            tileMode    = ADDR_TM_3D_TILED_THICK;
++            break;
++
++        default:
++            break;
++    }
++
++    if (tileMode != *pTileMode)
++    {
++        *pTileMode = tileMode;
++        bOverrided = TRUE;
++        ADDR_ASSERT(pIn->flags.prt == TRUE);
++    }
++
++    return bOverrided;
++}
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/addrlib/r800/siaddrlib.h b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/siaddrlib.h
+new file mode 100644
+index 0000000..897beb1
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/addrlib/r800/siaddrlib.h
+@@ -0,0 +1,262 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++***************************************************************************************************
++* @file  siaddrlib.h
++* @brief Contains the R800AddrLib class definition.
++***************************************************************************************************
++*/
++
++#ifndef __SI_ADDR_LIB_H__
++#define __SI_ADDR_LIB_H__
++
++#include "addrlib.h"
++#include "egbaddrlib.h"
++
++/**
++***************************************************************************************************
++* @brief Describes the information in tile mode table
++***************************************************************************************************
++*/
++struct ADDR_TILECONFIG
++{
++    AddrTileMode  mode;
++    AddrTileType  type;
++    ADDR_TILEINFO info;
++};
++
++/**
++***************************************************************************************************
++* @brief SI specific settings structure.
++***************************************************************************************************
++*/
++struct SIChipSettings
++{
++    struct
++    {
++        UINT_32 isSouthernIsland    : 1;
++        UINT_32 isTahiti            : 1;
++        UINT_32 isPitCairn          : 1;
++        UINT_32 isCapeVerde         : 1;
++        /// Oland/Hainan are of GFXIP 6.0, similar with SI
++        UINT_32 isOland             : 1;
++        UINT_32 isHainan            : 1;
++    };
++};
++
++/**
++***************************************************************************************************
++* @brief This class is the SI specific address library
++*        function set.
++***************************************************************************************************
++*/
++class SIAddrLib : public EgBasedAddrLib
++{
++public:
++    /// Creates SIAddrLib object
++    static AddrLib* CreateObj(const AddrClient* pClient)
++    {
++        return new(pClient) SIAddrLib(pClient);
++    }
++
++protected:
++    SIAddrLib(const AddrClient* pClient);
++    virtual ~SIAddrLib();
++
++    // Hwl interface - defined in AddrLib
++    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
++        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
++        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
++
++    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
++        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 numSlices,
++        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
++        ADDR_TILEINFO* pTileInfo, UINT_32* pBitPosition) const;
++
++    virtual VOID HwlComputeXmaskCoordFromAddr(
++        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
++        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
++        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
++
++    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
++        const ADDR_GET_TILEINDEX_INPUT* pIn,
++        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const;
++
++    virtual BOOL_32 HwlComputeMipLevel(
++        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
++
++    virtual AddrChipFamily HwlConvertChipFamily(
++        UINT_32 uChipFamily, UINT_32 uChipRevision);
++
++    virtual BOOL_32 HwlInitGlobalParams(
++        const ADDR_CREATE_INPUT* pCreateIn);
++
++    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
++        INT_32 index, INT_32 macroModeIndex,
++        ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
++
++    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
++        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
++        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
++
++    virtual UINT_64 HwlComputeHtileBytes(
++        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
++        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const;
++
++    virtual UINT_32 ComputePipeFromCoord(
++        UINT_32 x, UINT_32 y, UINT_32 slice,
++        AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE,
++        ADDR_TILEINFO* pTileInfo) const;
++
++    virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const;
++
++    /// Pre-handler of 3x pitch (96 bit) adjustment
++    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
++    /// Post-handler of 3x pitch adjustment
++    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
++
++    /// Dummy function to finalize the inheritance
++    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
++        UINT_32 pipe, UINT_32 x) const;
++
++    // Sub-hwl interface - defined in EgBasedAddrLib
++    virtual VOID HwlSetupTileInfo(
++        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
++        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
++        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
++        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
++        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
++
++    virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
++        UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
++        UINT_32 baseAlign, UINT_32 pitchAlign,
++        UINT_32 *pPitch, UINT_32 *pHeight) const;
++
++    virtual VOID HwlCheckLastMacroTiledLvl(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
++
++    virtual BOOL_32 HwlTileInfoEqual(
++        const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
++
++    virtual AddrTileMode HwlDegradeThickTileMode(
++        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
++
++    virtual BOOL_32 HwlOverrideTileMode(
++        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
++        AddrTileMode* pTileMode,
++        AddrTileType* pTileType) const;
++
++    virtual BOOL_32 HwlSanityCheckMacroTiled(
++        ADDR_TILEINFO* pTileInfo) const
++    {
++        return TRUE;
++    }
++
++    virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const;
++
++    virtual UINT_64 HwlGetSizeAdjustmentLinear(
++        AddrTileMode tileMode,
++        UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
++        UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const;
++
++    virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
++        AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
++        UINT_32 bank, UINT_32 pipe,
++        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
++        BOOL_32 ignoreSE,
++        ADDR_TILEINFO* pTileInfo) const;
++
++    virtual UINT_32 HwlPreAdjustBank(
++        UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const;
++
++    virtual INT_32 HwlPostCheckTileIndex(
++        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
++        INT curIndex = TileIndexInvalid) const;
++
++    virtual VOID   HwlFmaskPreThunkSurfInfo(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
++        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
++        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
++        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
++
++    virtual VOID   HwlFmaskPostThunkSurfInfo(
++        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
++        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
++
++    virtual UINT_32 HwlComputeFmaskBits(
++        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
++        UINT_32* pNumSamples) const;
++
++    virtual BOOL_32 HwlReduceBankWidthHeight(
++        UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
++        UINT_32 bankHeightAlign, UINT_32 pipes,
++        ADDR_TILEINFO* pTileInfo) const
++    {
++        return TRUE;
++    }
++
++    // Protected non-virtual functions
++    VOID ComputeTileCoordFromPipeAndElemIdx(
++        UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile,
++        UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const;
++
++    UINT_32 TileCoordToMaskElementIndex(
++        UINT_32 tx, UINT_32 ty, AddrPipeCfg  pipeConfig,
++        UINT_32 *macroShift, UINT_32 *elemIdxBits) const;
++
++    BOOL_32 DecodeGbRegs(
++        const ADDR_REGISTER_VALUE* pRegValue);
++
++    const ADDR_TILECONFIG* GetTileSetting(
++        UINT_32 index) const;
++
++    static const UINT_32    TileTableSize = 32;
++    ADDR_TILECONFIG         m_tileTable[TileTableSize];
++    UINT_32                 m_noOfEntries;
++
++private:
++
++    UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const;
++
++    VOID ReadGbTileMode(
++        UINT_32 regValue, ADDR_TILECONFIG* pCfg) const;
++    BOOL_32 InitTileSettingTable(
++        const UINT_32 *pSetting, UINT_32 noOfEntries);
++
++    SIChipSettings          m_settings;
++};
++
++#endif
++
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_id.h b/src/gallium/winsys/radeon/amdgpu/amdgpu_id.h
+new file mode 100644
+index 0000000..08a1591
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_id.h
+@@ -0,0 +1,157 @@
++/*
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/**
++ * This file is included by addrlib. It adds GPU family definitions and
++ * macros compatible with addrlib.
++ */
++
++#ifndef AMDGPU_ID_H
++#define AMDGPU_ID_H
++
++#include "pipe/p_config.h"
++
++#if defined(PIPE_ARCH_LITTLE_ENDIAN)
++#define LITTLEENDIAN_CPU
++#elif defined(PIPE_ARCH_BIG_ENDIAN)
++#define BIGENDIAN_CPU
++#endif
++
++enum {
++	FAMILY_UNKNOWN,
++	FAMILY_SI,
++	FAMILY_CI,
++	FAMILY_KV,
++	FAMILY_VI,
++	FAMILY_CZ,
++	FAMILY_PI,
++	FAMILY_LAST,
++};
++
++/* SI specific rev IDs */
++enum {
++	SI_TAHITI_P_A11      = 1,
++	SI_TAHITI_P_A0       = SI_TAHITI_P_A11,      /*A0 is alias of A11*/
++	SI_TAHITI_P_A21      = 5,
++	SI_TAHITI_P_B0       = SI_TAHITI_P_A21,      /*B0 is alias of A21*/
++	SI_TAHITI_P_A22      = 6,
++	SI_TAHITI_P_B1       = SI_TAHITI_P_A22,      /*B1 is alias of A22*/
++
++	SI_PITCAIRN_PM_A11   = 20,
++	SI_PITCAIRN_PM_A0    = SI_PITCAIRN_PM_A11,   /*A0 is alias of A11*/
++	SI_PITCAIRN_PM_A12   = 21,
++	SI_PITCAIRN_PM_A1    = SI_PITCAIRN_PM_A12,   /*A1 is alias of A12*/
++
++	SI_CAPEVERDE_M_A11   = 40,
++	SI_CAPEVERDE_M_A0    = SI_CAPEVERDE_M_A11,   /*A0 is alias of A11*/
++	SI_CAPEVERDE_M_A12   = 41,
++	SI_CAPEVERDE_M_A1    = SI_CAPEVERDE_M_A12,   /*A1 is alias of A12*/
++
++	SI_OLAND_M_A0        = 60,
++
++	SI_HAINAN_V_A0       = 70,
++
++	SI_UNKNOWN           = 0xFF
++};
++
++
++#define ASICREV_IS_TAHITI_P(eChipRev)	\
++	(eChipRev < SI_PITCAIRN_PM_A11)
++#define ASICREV_IS_PITCAIRN_PM(eChipRev)	\
++	((eChipRev >= SI_PITCAIRN_PM_A11) && (eChipRev < SI_CAPEVERDE_M_A11))
++#define ASICREV_IS_CAPEVERDE_M(eChipRev)	\
++	((eChipRev >= SI_CAPEVERDE_M_A11) && (eChipRev < SI_OLAND_M_A0))
++#define ASICREV_IS_OLAND_M(eChipRev)	\
++	((eChipRev >= SI_OLAND_M_A0) && (eChipRev < SI_HAINAN_V_A0))
++#define ASICREV_IS_HAINAN_V(eChipRev)	\
++(eChipRev >= SI_HAINAN_V_A0)
++
++/* CI specific revIDs */
++enum {
++	CI_BONAIRE_M_A0 = 20,
++	CI_BONAIRE_M_A1 = 21,
++
++	CI_HAWAII_P_A0  = 40,
++
++	CI_UNKNOWN      = 0xFF
++};
++
++#define ASICREV_IS_BONAIRE_M(eChipRev)	\
++	((eChipRev >= CI_BONAIRE_M_A0) && (eChipRev < CI_HAWAII_P_A0))
++#define ASICREV_IS_HAWAII_P(eChipRev)	\
++	(eChipRev >= CI_HAWAII_P_A0)
++
++/* KV specific rev IDs */
++enum {
++	KV_SPECTRE_A0      = 0x01,       /* KV1 with Spectre GFX core, 8-8-1-2 (CU-Pix-Primitive-RB) */
++	KV_SPOOKY_A0       = 0x41,       /* KV2 with Spooky GFX core, including downgraded from Spectre core, 3-4-1-1 (CU-Pix-Primitive-RB) */
++	KB_KALINDI_A0      = 0x81,       /* KB with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
++	KB_KALINDI_A1      = 0x82,       /* KB with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
++	BV_KALINDI_A2      = 0x85,       /* BV with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
++	ML_GODAVARI_A0     = 0xa1,      /* ML with Godavari GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
++	ML_GODAVARI_A1     = 0xa2,      /* ML with Godavari GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
++	KV_UNKNOWN = 0xFF
++};
++
++#define ASICREV_IS_SPECTRE(eChipRev)	\
++	((eChipRev >= KV_SPECTRE_A0) && (eChipRev < KV_SPOOKY_A0))         /* identify all versions of SPRECTRE and supported features set */
++#define ASICREV_IS_SPOOKY(eChipRev)	\
++	((eChipRev >= KV_SPOOKY_A0) && (eChipRev < KB_KALINDI_A0))          /* identify all versions of SPOOKY and supported features set */
++#define ASICREV_IS_KALINDI(eChipRev)	\
++	((eChipRev >= KB_KALINDI_A0) && (eChipRev < KV_UNKNOWN))           /* identify all versions of KALINDI and supported features set */
++
++/* Following macros are subset of ASICREV_IS_KALINDI macro */
++#define ASICREV_IS_KALINDI_BHAVANI(eChipRev)	\
++	((eChipRev >= BV_KALINDI_A2) && (eChipRev < ML_GODAVARI_A0))   /* identify all versions of BHAVANI and supported features set */
++#define ASICREV_IS_KALINDI_GODAVARI(eChipRev)	\
++	((eChipRev >= ML_GODAVARI_A0) && (eChipRev < KV_UNKNOWN)) /* identify all versions of GODAVARI and supported features set */
++
++/* VI specific rev IDs */
++enum {
++	VI_ICELAND_M_A0   = 1,
++
++	VI_TONGA_P_A0     = 20,
++	VI_TONGA_P_A1     = 21,
++
++	VI_UNKNOWN        = 0xFF
++};
++
++
++#define ASICREV_IS_ICELAND_M(eChipRev)	\
++	(eChipRev < VI_TONGA_P_A0)
++#define ASICREV_IS_TONGA_P(eChipRev)	\
++	(eChipRev >= VI_TONGA_P_A0)
++
++/* CZ specific rev IDs */
++enum {
++	CZ_CARRIZO_A0      = 0x01,
++	CZ_UNKNOWN      = 0xFF
++};
++
++#define ASICREV_IS_CARRIZO(eChipRev) \
++	(eChipRev >= CARRIZO_A0)
++
++#endif /* AMDGPU_ID_H */
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_surface.c b/src/gallium/winsys/radeon/amdgpu/amdgpu_surface.c
+new file mode 100644
+index 0000000..aa32413
+--- /dev/null
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_surface.c
+@@ -0,0 +1,436 @@
++/*
++ * Copyright © 2011 Red Hat All Rights Reserved.
++ * Copyright © 2014 Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++
++/* Contact:
++ *     Marek Olšák <maraeo@gmail.com>
++ */
++
++#include "amdgpu_winsys.h"
++
++#ifndef NO_ENTRIES
++#define NO_ENTRIES 32
++#endif
++
++#ifndef NO_MACRO_ENTRIES
++#define NO_MACRO_ENTRIES 16
++#endif
++
++#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
++#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
++#endif
++
++
++static int amdgpu_surface_sanity(const struct radeon_surf *surf)
++{
++   unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
++
++   if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX))
++      return -EINVAL;
++
++   /* all dimension must be at least 1 ! */
++   if (!surf->npix_x || !surf->npix_y || !surf->npix_z ||
++       !surf->array_size)
++      return -EINVAL;
++
++   if (!surf->blk_w || !surf->blk_h || !surf->blk_d)
++      return -EINVAL;
++
++   switch (surf->nsamples) {
++   case 1:
++   case 2:
++   case 4:
++   case 8:
++      break;
++   default:
++      return -EINVAL;
++   }
++
++   switch (type) {
++   case RADEON_SURF_TYPE_1D:
++      if (surf->npix_y > 1)
++         return -EINVAL;
++      /* fall through */
++   case RADEON_SURF_TYPE_2D:
++   case RADEON_SURF_TYPE_CUBEMAP:
++      if (surf->npix_z > 1 || surf->array_size > 1)
++         return -EINVAL;
++      break;
++   case RADEON_SURF_TYPE_3D:
++      if (surf->array_size > 1)
++         return -EINVAL;
++      break;
++   case RADEON_SURF_TYPE_1D_ARRAY:
++      if (surf->npix_y > 1)
++         return -EINVAL;
++      /* fall through */
++   case RADEON_SURF_TYPE_2D_ARRAY:
++      if (surf->npix_z > 1)
++         return -EINVAL;
++      break;
++   default:
++      return -EINVAL;
++   }
++   return 0;
++}
++
++static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
++{
++   return malloc(pInput->sizeInBytes);
++}
++
++static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)
++{
++   free(pInput->pVirtAddr);
++   return ADDR_OK;
++}
++
++/**
++ * This returns the number of banks for the surface.
++ * Possible values: 2, 4, 8, 16.
++ */
++static uint32_t cik_num_banks(struct amdgpu_winsys *ws,
++                              struct radeon_surf *surf)
++{
++   unsigned index, tileb;
++
++   tileb = 8 * 8 * surf->bpe;
++   tileb = MIN2(surf->tile_split, tileb);
++
++   for (index = 0; tileb > 64; index++) {
++      tileb >>= 1;
++   }
++   assert(index < 16);
++
++   return 2 << ((ws->amdinfo.gb_macro_tile_mode[index] >> 6) & 0x3);
++}
++
++ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
++{
++   ADDR_CREATE_INPUT addrCreateInput = {0};
++   ADDR_CREATE_OUTPUT addrCreateOutput = {0};
++   ADDR_REGISTER_VALUE regValue = {0};
++   ADDR_CREATE_FLAGS createFlags = {{0}};
++   ADDR_E_RETURNCODE addrRet;
++
++   addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
++   addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
++
++   regValue.noOfBanks = ws->amdinfo.mc_arb_ramcfg & 0x3;
++   regValue.gbAddrConfig = ws->amdinfo.gb_addr_cfg;
++   regValue.noOfRanks = (ws->amdinfo.mc_arb_ramcfg & 0x4) >> 2;
++
++   regValue.backendDisables = ws->amdinfo.backend_disable[0];
++   regValue.pTileConfig = ws->amdinfo.gb_tile_mode;
++   regValue.noOfEntries = sizeof(ws->amdinfo.gb_tile_mode) /
++                          sizeof(ws->amdinfo.gb_tile_mode[0]);
++   regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
++   regValue.noOfMacroEntries = sizeof(ws->amdinfo.gb_macro_tile_mode) /
++                               sizeof(ws->amdinfo.gb_macro_tile_mode[0]);
++
++   createFlags.value = 0;
++   createFlags.useTileIndex = 1;
++   createFlags.degradeBaseLevel = 1;
++
++   addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
++   addrCreateInput.chipFamily = ws->family;
++   addrCreateInput.chipRevision = ws->rev_id;
++   addrCreateInput.createFlags = createFlags;
++   addrCreateInput.callbacks.allocSysMem = allocSysMem;
++   addrCreateInput.callbacks.freeSysMem = freeSysMem;
++   addrCreateInput.callbacks.debugPrint = 0;
++   addrCreateInput.regValue = regValue;
++
++   addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
++   if (addrRet != ADDR_OK)
++      return NULL;
++
++   return addrCreateOutput.hLib;
++}
++
++static int compute_level(struct amdgpu_winsys *ws,
++                         struct radeon_surf *surf, bool is_stencil,
++                         unsigned level, unsigned type, bool compressed,
++                         ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
++                         ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut)
++{
++   struct radeon_surf_level *surf_level;
++   ADDR_E_RETURNCODE ret;
++
++   AddrSurfInfoIn->mipLevel = level;
++   AddrSurfInfoIn->width = u_minify(surf->npix_x, level);
++   AddrSurfInfoIn->height = u_minify(surf->npix_y, level);
++
++   if (type == RADEON_SURF_TYPE_3D)
++      AddrSurfInfoIn->numSlices = u_minify(surf->npix_z, level);
++   else if (type == RADEON_SURF_TYPE_CUBEMAP)
++      AddrSurfInfoIn->numSlices = 6;
++   else
++      AddrSurfInfoIn->numSlices = surf->array_size;
++
++   if (level > 0) {
++      /* Set the base level pitch. This is needed for calculation
++       * of non-zero levels. */
++      if (is_stencil)
++         AddrSurfInfoIn->basePitch = surf->stencil_level[0].nblk_x;
++      else
++         AddrSurfInfoIn->basePitch = surf->level[0].nblk_x;
++
++      /* Convert blocks to pixels for compressed formats. */
++      if (compressed)
++         AddrSurfInfoIn->basePitch *= surf->blk_w;
++   }
++
++   ret = AddrComputeSurfaceInfo(ws->addrlib,
++                                AddrSurfInfoIn,
++                                AddrSurfInfoOut);
++   if (ret != ADDR_OK) {
++      return ret;
++   }
++
++   surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level];
++   surf_level->offset = align(surf->bo_size, AddrSurfInfoOut->baseAlign);
++   surf_level->slice_size = AddrSurfInfoOut->sliceSize;
++   surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe);
++   surf_level->npix_x = u_minify(surf->npix_x, level);
++   surf_level->npix_y = u_minify(surf->npix_y, level);
++   surf_level->npix_z = u_minify(surf->npix_z, level);
++   surf_level->nblk_x = AddrSurfInfoOut->pitch;
++   surf_level->nblk_y = AddrSurfInfoOut->height;
++   if (type == RADEON_SURF_TYPE_3D)
++      surf_level->nblk_z = AddrSurfInfoOut->depth;
++   else
++      surf_level->nblk_z = 1;
++
++   switch (AddrSurfInfoOut->tileMode) {
++   case ADDR_TM_LINEAR_GENERAL:
++      surf_level->mode = RADEON_SURF_MODE_LINEAR;
++      break;
++   case ADDR_TM_LINEAR_ALIGNED:
++      surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
++      break;
++   case ADDR_TM_1D_TILED_THIN1:
++      surf_level->mode = RADEON_SURF_MODE_1D;
++      break;
++   case ADDR_TM_2D_TILED_THIN1:
++      surf_level->mode = RADEON_SURF_MODE_2D;
++      break;
++   default:
++      assert(0);
++   }
++
++   if (is_stencil)
++      surf->stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
++   else
++      surf->tiling_index[level] = AddrSurfInfoOut->tileIndex;
++
++   surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;
++   return 0;
++}
++
++static int amdgpu_surface_init(struct radeon_winsys *rws,
++                               struct radeon_surf *surf)
++{
++   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
++   unsigned level, mode, type;
++   bool compressed;
++   ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
++   ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
++   ADDR_TILEINFO AddrTileInfoIn = {0};
++   ADDR_TILEINFO AddrTileInfoOut = {0};
++   int r;
++
++   r = amdgpu_surface_sanity(surf);
++   if (r)
++      return r;
++
++   AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
++   AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
++   AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
++
++   type = RADEON_SURF_GET(surf->flags, TYPE);
++   mode = RADEON_SURF_GET(surf->flags, MODE);
++   compressed = surf->blk_w == 4 && surf->blk_h == 4;
++
++   /* MSAA and FMASK require 2D tiling. */
++   if (surf->nsamples > 1 ||
++       (surf->flags & RADEON_SURF_FMASK))
++      mode = RADEON_SURF_MODE_2D;
++
++   /* DB doesn't support linear layouts. */
++   if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
++       mode < RADEON_SURF_MODE_1D)
++      mode = RADEON_SURF_MODE_1D;
++
++   /* Set the requested tiling mode. */
++   switch (mode) {
++   case RADEON_SURF_MODE_LINEAR:
++      AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_GENERAL;
++      break;
++   case RADEON_SURF_MODE_LINEAR_ALIGNED:
++      AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
++      break;
++   case RADEON_SURF_MODE_1D:
++      AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
++      break;
++   case RADEON_SURF_MODE_2D:
++      AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
++      break;
++   default:
++      assert(0);
++   }
++
++   /* The format must be set correctly for the allocation of compressed
++    * textures to work. In other cases, setting the bpp is sufficient. */
++   if (compressed) {
++      switch (surf->bpe) {
++      case 8:
++         AddrSurfInfoIn.format = ADDR_FMT_BC1;
++         break;
++      case 16:
++         AddrSurfInfoIn.format = ADDR_FMT_BC3;
++         break;
++      default:
++         assert(0);
++      }
++   }
++   else {
++      AddrSurfInfoIn.bpp = surf->bpe * 8;
++   }
++
++   AddrSurfInfoIn.numSamples = surf->nsamples;
++   AddrSurfInfoIn.tileIndex = -1;
++
++   /* Set the micro tile type. */
++   if (surf->flags & RADEON_SURF_SCANOUT)
++      AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
++   else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
++      AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
++   else
++      AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
++
++   AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
++   AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
++   AddrSurfInfoIn.flags.stencil = (surf->flags & RADEON_SURF_SBUFFER) != 0;
++   AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
++   AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
++   AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
++   AddrSurfInfoIn.flags.degrade4Space = 1;
++
++   /* This disables incorrect calculations (hacks) in addrlib. */
++   AddrSurfInfoIn.flags.noStencil = 1;
++
++   /* Set preferred macrotile parameters. This is usually required
++    * for shared resources. This is for 2D tiling only. */
++   if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
++       surf->bankw && surf->bankh && surf->mtilea && surf->tile_split) {
++      /* If any of these parameters are incorrect, the calculation
++       * will fail. */
++      AddrTileInfoIn.banks = cik_num_banks(ws, surf);
++      AddrTileInfoIn.bankWidth = surf->bankw;
++      AddrTileInfoIn.bankHeight = surf->bankh;
++      AddrTileInfoIn.macroAspectRatio = surf->mtilea;
++      AddrTileInfoIn.tileSplitBytes = surf->tile_split;
++      AddrSurfInfoIn.flags.degrade4Space = 0;
++      AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
++
++      /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
++       * the tile index, because we are expected to know it if
++       * we know the other parameters.
++       *
++       * This is something that can easily be fixed in Addrlib.
++       * For now, just figure it out here.
++       * Note that only 2D_TILE_THIN1 is handled here.
++       */
++      assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
++      assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
++
++      if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
++         AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
++      else
++         AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
++   }
++
++   surf->bo_size = 0;
++
++   /* Calculate texture layout information. */
++   for (level = 0; level <= surf->last_level; level++) {
++      r = compute_level(ws, surf, false, level, type, compressed,
++                        &AddrSurfInfoIn, &AddrSurfInfoOut);
++      if (r)
++         return r;
++
++      if (level == 0) {
++         surf->bo_alignment = AddrSurfInfoOut.baseAlign;
++
++         /* For 2D modes only. */
++         if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
++            surf->bankw = AddrSurfInfoOut.pTileInfo->bankWidth;
++            surf->bankh = AddrSurfInfoOut.pTileInfo->bankHeight;
++            surf->mtilea = AddrSurfInfoOut.pTileInfo->macroAspectRatio;
++            surf->tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
++         }
++      }
++   }
++
++   /* Calculate texture layout information for stencil. */
++   if (surf->flags & RADEON_SURF_SBUFFER) {
++      AddrSurfInfoIn.bpp = 8;
++      /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
++      AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
++
++      for (level = 0; level <= surf->last_level; level++) {
++         r = compute_level(ws, surf, true, level, type, compressed,
++                           &AddrSurfInfoIn, &AddrSurfInfoOut);
++         if (r)
++            return r;
++
++         if (level == 0) {
++            surf->stencil_offset = surf->stencil_level[0].offset;
++
++            /* For 2D modes only. */
++            if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
++               surf->stencil_tile_split =
++                     AddrSurfInfoOut.pTileInfo->tileSplitBytes;
++            }
++         }
++      }
++   }
++
++   return 0;
++}
++
++static int amdgpu_surface_best(struct radeon_winsys *rws,
++                               struct radeon_surf *surf)
++{
++   return 0;
++}
++
++void amdgpu_surface_init_functions(struct amdgpu_winsys *ws)
++{
++   ws->base.surface_init = amdgpu_surface_init;
++   ws->base.surface_best = amdgpu_surface_best;
++}
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c b/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c
+index 0f3367a..d3992e9 100644
+--- a/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.c
+@@ -39,6 +39,7 @@
+ #include <xf86drm.h>
+ #include <stdio.h>
+ #include <sys/stat.h>
++#include "amdgpu_id.h"
+ 
+ #define CIK_TILE_MODE_COLOR_2D			14
+ 
+@@ -198,6 +199,51 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
+       goto fail;
+    }
+ 
++   /* family and rev_id are for addrlib */
++   switch (ws->info.family) {
++   case CHIP_BONAIRE:
++      ws->family = FAMILY_CI;
++      ws->rev_id = CI_BONAIRE_M_A0;
++      break;
++   case CHIP_KAVERI:
++      ws->family = FAMILY_KV;
++      ws->rev_id = KV_SPECTRE_A0;
++      break;
++   case CHIP_KABINI:
++      ws->family = FAMILY_KV;
++      ws->rev_id = KB_KALINDI_A0;
++      break;
++   case CHIP_HAWAII:
++      ws->family = FAMILY_CI;
++      ws->rev_id = CI_HAWAII_P_A0;
++      break;
++   case CHIP_MULLINS:
++      ws->family = FAMILY_KV;
++      ws->rev_id = ML_GODAVARI_A0;
++      break;
++   case CHIP_TONGA:
++      ws->family = FAMILY_VI;
++      ws->rev_id = VI_TONGA_P_A0;
++      break;
++   case CHIP_ICELAND:
++      ws->family = FAMILY_VI;
++      ws->rev_id = VI_ICELAND_M_A0;
++      break;
++   case CHIP_CARRIZO:
++      ws->family = FAMILY_CZ;
++      ws->rev_id = CZ_CARRIZO_A0;
++      break;
++   default:
++      fprintf(stderr, "amdgpu: Unknown family.\n");
++      goto fail;
++   }
++
++   ws->addrlib = amdgpu_addr_create(ws);
++   if (!ws->addrlib) {
++      fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
++      goto fail;
++   }
++
+    /* Set hardware information. */
+    ws->info.gart_size = gtt.heap_size;
+    ws->info.vram_size = vram.heap_size;
+@@ -232,6 +278,8 @@ fail:
+    if (ws->ctx) {
+       amdgpu_cs_ctx_free(ws->dev, ws->ctx);
+    }
++   if (ws->addrlib)
++      AddrDestroy(ws->addrlib);
+    amdgpu_device_deinitialize(ws->dev);
+    ws->dev = NULL;
+    return FALSE;
+@@ -251,6 +299,7 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
+ 
+    ws->cman->destroy(ws->cman);
+    ws->kman->destroy(ws->kman);
++   AddrDestroy(ws->addrlib);
+ 
+    amdgpu_cs_ctx_free(ws->dev, ws->ctx);
+    amdgpu_device_deinitialize(ws->dev);
+@@ -451,6 +500,7 @@ struct radeon_winsys *
+ 
+    amdgpu_bomgr_init_functions(ws);
+    amdgpu_cs_init_functions(ws);
++   amdgpu_surface_init_functions(ws);
+ 
+    pipe_mutex_init(ws->cs_stack_lock);
+ 
+diff --git a/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h b/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h
+index fc27f1c..8755108 100644
+--- a/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h
++++ b/src/gallium/winsys/radeon/amdgpu/amdgpu_winsys.h
+@@ -31,6 +31,7 @@
+ #define AMDGPU_DRM_WINSYS_H
+ 
+ #include "../radeon_winsys.h"
++#include "addrlib/addrinterface.h"
+ #include "os/os_thread.h"
+ #include <amdgpu.h>
+ 
+@@ -67,6 +68,9 @@ struct amdgpu_winsys {
+    struct amdgpu_cs *cs_stack[RING_LAST];
+ 
+    struct amdgpu_gpu_info amdinfo;
++   ADDR_HANDLE addrlib;
++   uint32_t rev_id;
++   unsigned family;
+ };
+ 
+ static INLINE struct amdgpu_winsys *
+@@ -76,5 +80,7 @@ amdgpu_winsys(struct radeon_winsys *base)
+ }
+ 
+ void amdgpu_ws_queue_cs(struct amdgpu_winsys *ws, struct amdgpu_cs *cs);
++void amdgpu_surface_init_functions(struct amdgpu_winsys *ws);
++ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws);
+ 
+ #endif
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0010-radeonsi-fix-DRM-version-checks-for-amdgpu-DRM-3.0.0.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0010-radeonsi-fix-DRM-version-checks-for-amdgpu-DRM-3.0.0.patch
new file mode 100644
index 00000000..16cd660b
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0010-radeonsi-fix-DRM-version-checks-for-amdgpu-DRM-3.0.0.patch
@@ -0,0 +1,137 @@
+From 05c1faed5211f58309d24729667f1af5ad72f954 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 20:35:27 +0200
+Subject: [PATCH 10/29] radeonsi: fix DRM version checks for amdgpu DRM 3.0.0
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/r600_buffer_common.c | 6 ++++--
+ src/gallium/drivers/radeon/r600_pipe_common.c   | 4 +++-
+ src/gallium/drivers/radeon/r600_texture.c       | 8 +++++---
+ src/gallium/drivers/radeonsi/si_pipe.c          | 4 +++-
+ src/gallium/drivers/radeonsi/si_state.c         | 8 ++++----
+ 5 files changed, 19 insertions(+), 11 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
+index fc5f6c2..ac395fa 100644
+--- a/src/gallium/drivers/radeon/r600_buffer_common.c
++++ b/src/gallium/drivers/radeon/r600_buffer_common.c
+@@ -121,7 +121,8 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
+ 		/* Older kernels didn't always flush the HDP cache before
+ 		 * CS execution
+ 		 */
+-		if (rscreen->info.drm_minor < 40) {
++		if (rscreen->info.drm_major == 2 &&
++		    rscreen->info.drm_minor < 40) {
+ 			res->domains = RADEON_DOMAIN_GTT;
+ 			flags |= RADEON_FLAG_GTT_WC;
+ 			break;
+@@ -147,7 +148,8 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
+ 		 * Write-combined CPU mappings are fine, the kernel ensures all CPU
+ 		 * writes finish before the GPU executes a command stream.
+ 		 */
+-		if (rscreen->info.drm_minor < 40)
++		if (rscreen->info.drm_major == 2 &&
++		    rscreen->info.drm_minor < 40)
+ 			res->domains = RADEON_DOMAIN_GTT;
+ 		else if (res->domains & RADEON_DOMAIN_VRAM)
+ 			flags |= RADEON_FLAG_CPU_ACCESS;
+diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
+index c6d7918..3b26dea 100644
+--- a/src/gallium/drivers/radeon/r600_pipe_common.c
++++ b/src/gallium/drivers/radeon/r600_pipe_common.c
+@@ -864,7 +864,9 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
+ 	util_format_s3tc_init();
+ 	pipe_mutex_init(rscreen->aux_context_lock);
+ 
+-	if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) {
++	if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) ||
++	     rscreen->info.drm_major == 3) &&
++	    (rscreen->debug_flags & DBG_TRACE_CS)) {
+ 		rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b,
+ 										PIPE_BIND_CUSTOM,
+ 										PIPE_USAGE_STAGING,
+diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
+index dc510c9..1b64507 100644
+--- a/src/gallium/drivers/radeon/r600_texture.c
++++ b/src/gallium/drivers/radeon/r600_texture.c
+@@ -489,7 +489,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
+ 	unsigned num_pipes = rscreen->tiling_info.num_channels;
+ 
+ 	if (rscreen->chip_class <= EVERGREEN &&
+-	    rscreen->info.drm_minor < 26)
++	    rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
+ 		return 0;
+ 
+ 	/* HW bug on R6xx. */
+@@ -501,7 +501,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
+ 	/* HTILE is broken with 1D tiling on old kernels and CIK. */
+ 	if (rscreen->chip_class >= CIK &&
+ 	    rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
+-	    rscreen->info.drm_minor < 38)
++	    rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
+ 		return 0;
+ 
+ 	switch (num_pipes) {
+@@ -1262,7 +1262,9 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
+ 
+ 		/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
+ 		if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
+-		    rctx->chip_class >= CIK && rctx->screen->info.drm_minor < 38) {
++		    rctx->chip_class >= CIK &&
++		    rctx->screen->info.drm_major == 2 &&
++		    rctx->screen->info.drm_minor < 38) {
+ 			continue;
+ 		}
+ 
+diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
+index e68c30e..91b4d6f 100644
+--- a/src/gallium/drivers/radeonsi/si_pipe.c
++++ b/src/gallium/drivers/radeonsi/si_pipe.c
+@@ -259,7 +259,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
+ 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ 		/* 2D tiling on CIK is supported since DRM 2.35.0 */
+ 		return sscreen->b.chip_class < CIK ||
+-		       sscreen->b.info.drm_minor >= 35;
++		       (sscreen->b.info.drm_major == 2 &&
++			sscreen->b.info.drm_minor >= 35) ||
++		       sscreen->b.info.drm_major == 3;
+ 
+         case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+                 return R600_MAP_BUFFER_ALIGNMENT;
+diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
+index 7f0fdd5..eae5e6f 100644
+--- a/src/gallium/drivers/radeonsi/si_state.c
++++ b/src/gallium/drivers/radeonsi/si_state.c
+@@ -1146,7 +1146,9 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
+ 				       int first_non_void)
+ {
+ 	struct si_screen *sscreen = (struct si_screen*)screen;
+-	bool enable_s3tc = sscreen->b.info.drm_minor >= 31;
++	bool enable_s3tc = (sscreen->b.info.drm_major == 2 &&
++			    sscreen->b.info.drm_minor >= 31) ||
++			   sscreen->b.info.drm_major == 3;
+ 	boolean uniform = TRUE;
+ 	int i;
+ 
+@@ -1595,7 +1597,6 @@ boolean si_is_format_supported(struct pipe_screen *screen,
+                                unsigned sample_count,
+                                unsigned usage)
+ {
+-	struct si_screen *sscreen = (struct si_screen *)screen;
+ 	unsigned retval = 0;
+ 
+ 	if (target >= PIPE_MAX_TEXTURE_TYPES) {
+@@ -1607,8 +1608,7 @@ boolean si_is_format_supported(struct pipe_screen *screen,
+ 		return FALSE;
+ 
+ 	if (sample_count > 1) {
+-		/* 2D tiling on CIK is supported since DRM 2.35.0 */
+-		if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
++		if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
+ 			return FALSE;
+ 
+ 		switch (sample_count) {
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0011-radeonsi-add-VI-register-definitions.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0011-radeonsi-add-VI-register-definitions.patch
new file mode 100644
index 00000000..6154ba3b
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0011-radeonsi-add-VI-register-definitions.patch
@@ -0,0 +1,1753 @@
+From c87e3033117dbead0f02c12117a72b6726134a5c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 20:12:24 +0200
+Subject: [PATCH 11/29] radeonsi: add VI register definitions
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/r600d_common.h |    2 +-
+ src/gallium/drivers/radeonsi/si_state.c   |   12 +-
+ src/gallium/drivers/radeonsi/sid.h        | 1089 +++++++++++++++++++++++++++--
+ 3 files changed, 1043 insertions(+), 60 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/r600d_common.h b/src/gallium/drivers/radeon/r600d_common.h
+index 74c8d87..bef5586 100644
+--- a/src/gallium/drivers/radeon/r600d_common.h
++++ b/src/gallium/drivers/radeon/r600d_common.h
+@@ -177,7 +177,7 @@
+ #define   S_028804_INTERPOLATE_SRC_Z(x)			(((x) & 0x1) << 19)
+ #define   S_028804_STATIC_ANCHOR_ASSOCIATIONS(x)	(((x) & 0x1) << 20)
+ #define   S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)	(((x) & 0x1) << 21)
+-#define   S_028804_OVERRASTERIZATION_AMOUNT(x)		(((x) & 0x7) << 24)
++#define   S_028804_OVERRASTERIZATION_AMOUNT(x)		(((x) & 0x07) << 24)
+ #define   S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)	(((x) & 0x1) << 27)
+ #define CM_R_028BDC_PA_SC_LINE_CNTL                  0x28bdc
+ #define   S_028BDC_EXPAND_LINE_WIDTH(x)                (((x) & 0x1) << 9)
+diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
+index eae5e6f..6a0b093 100644
+--- a/src/gallium/drivers/radeonsi/si_state.c
++++ b/src/gallium/drivers/radeonsi/si_state.c
+@@ -523,12 +523,12 @@ static void si_set_viewport_states(struct pipe_context *ctx,
+ 		return;
+ 
+ 	viewport->viewport = *state;
+-	si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
+-	si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
+-	si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
+-	si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
+-	si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
+-	si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
++	si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE, fui(state->scale[0]));
++	si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET, fui(state->translate[0]));
++	si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE, fui(state->scale[1]));
++	si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET, fui(state->translate[1]));
++	si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE, fui(state->scale[2]));
++	si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET, fui(state->translate[2]));
+ 
+ 	si_pm4_set_state(sctx, viewport, viewport);
+ }
+diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
+index afe011b..d7348a7 100644
+--- a/src/gallium/drivers/radeonsi/sid.h
++++ b/src/gallium/drivers/radeonsi/sid.h
+@@ -206,6 +206,80 @@
+  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+  */
+ 
++
++#define R_008010_GRBM_STATUS                                            0x008010
++#define   S_008010_ME0PIPE0_CMDFIFO_AVAIL(x)                          (((x) & 0x0F) << 0)
++#define   G_008010_ME0PIPE0_CMDFIFO_AVAIL(x)                          (((x) >> 0) & 0x0F)
++#define   C_008010_ME0PIPE0_CMDFIFO_AVAIL                             0xFFFFFFF0
++#define   S_008010_SRBM_RQ_PENDING(x)                                 (((x) & 0x1) << 5)
++#define   G_008010_SRBM_RQ_PENDING(x)                                 (((x) >> 5) & 0x1)
++#define   C_008010_SRBM_RQ_PENDING                                    0xFFFFFFDF
++#define   S_008010_ME0PIPE0_CF_RQ_PENDING(x)                          (((x) & 0x1) << 7)
++#define   G_008010_ME0PIPE0_CF_RQ_PENDING(x)                          (((x) >> 7) & 0x1)
++#define   C_008010_ME0PIPE0_CF_RQ_PENDING                             0xFFFFFF7F
++#define   S_008010_ME0PIPE0_PF_RQ_PENDING(x)                          (((x) & 0x1) << 8)
++#define   G_008010_ME0PIPE0_PF_RQ_PENDING(x)                          (((x) >> 8) & 0x1)
++#define   C_008010_ME0PIPE0_PF_RQ_PENDING                             0xFFFFFEFF
++#define   S_008010_GDS_DMA_RQ_PENDING(x)                              (((x) & 0x1) << 9)
++#define   G_008010_GDS_DMA_RQ_PENDING(x)                              (((x) >> 9) & 0x1)
++#define   C_008010_GDS_DMA_RQ_PENDING                                 0xFFFFFDFF
++#define   S_008010_DB_CLEAN(x)                                        (((x) & 0x1) << 12)
++#define   G_008010_DB_CLEAN(x)                                        (((x) >> 12) & 0x1)
++#define   C_008010_DB_CLEAN                                           0xFFFFEFFF
++#define   S_008010_CB_CLEAN(x)                                        (((x) & 0x1) << 13)
++#define   G_008010_CB_CLEAN(x)                                        (((x) >> 13) & 0x1)
++#define   C_008010_CB_CLEAN                                           0xFFFFDFFF
++#define   S_008010_TA_BUSY(x)                                         (((x) & 0x1) << 14)
++#define   G_008010_TA_BUSY(x)                                         (((x) >> 14) & 0x1)
++#define   C_008010_TA_BUSY                                            0xFFFFBFFF
++#define   S_008010_GDS_BUSY(x)                                        (((x) & 0x1) << 15)
++#define   G_008010_GDS_BUSY(x)                                        (((x) >> 15) & 0x1)
++#define   C_008010_GDS_BUSY                                           0xFFFF7FFF
++#define   S_008010_WD_BUSY_NO_DMA(x)                                  (((x) & 0x1) << 16)
++#define   G_008010_WD_BUSY_NO_DMA(x)                                  (((x) >> 16) & 0x1)
++#define   C_008010_WD_BUSY_NO_DMA                                     0xFFFEFFFF
++#define   S_008010_VGT_BUSY(x)                                        (((x) & 0x1) << 17)
++#define   G_008010_VGT_BUSY(x)                                        (((x) >> 17) & 0x1)
++#define   C_008010_VGT_BUSY                                           0xFFFDFFFF
++#define   S_008010_IA_BUSY_NO_DMA(x)                                  (((x) & 0x1) << 18)
++#define   G_008010_IA_BUSY_NO_DMA(x)                                  (((x) >> 18) & 0x1)
++#define   C_008010_IA_BUSY_NO_DMA                                     0xFFFBFFFF
++#define   S_008010_IA_BUSY(x)                                         (((x) & 0x1) << 19)
++#define   G_008010_IA_BUSY(x)                                         (((x) >> 19) & 0x1)
++#define   C_008010_IA_BUSY                                            0xFFF7FFFF
++#define   S_008010_SX_BUSY(x)                                         (((x) & 0x1) << 20)
++#define   G_008010_SX_BUSY(x)                                         (((x) >> 20) & 0x1)
++#define   C_008010_SX_BUSY                                            0xFFEFFFFF
++#define   S_008010_WD_BUSY(x)                                         (((x) & 0x1) << 21)
++#define   G_008010_WD_BUSY(x)                                         (((x) >> 21) & 0x1)
++#define   C_008010_WD_BUSY                                            0xFFDFFFFF
++#define   S_008010_SPI_BUSY(x)                                        (((x) & 0x1) << 22)
++#define   G_008010_SPI_BUSY(x)                                        (((x) >> 22) & 0x1)
++#define   C_008010_SPI_BUSY                                           0xFFBFFFFF
++#define   S_008010_BCI_BUSY(x)                                        (((x) & 0x1) << 23)
++#define   G_008010_BCI_BUSY(x)                                        (((x) >> 23) & 0x1)
++#define   C_008010_BCI_BUSY                                           0xFF7FFFFF
++#define   S_008010_SC_BUSY(x)                                         (((x) & 0x1) << 24)
++#define   G_008010_SC_BUSY(x)                                         (((x) >> 24) & 0x1)
++#define   C_008010_SC_BUSY                                            0xFEFFFFFF
++#define   S_008010_PA_BUSY(x)                                         (((x) & 0x1) << 25)
++#define   G_008010_PA_BUSY(x)                                         (((x) >> 25) & 0x1)
++#define   C_008010_PA_BUSY                                            0xFDFFFFFF
++#define   S_008010_DB_BUSY(x)                                         (((x) & 0x1) << 26)
++#define   G_008010_DB_BUSY(x)                                         (((x) >> 26) & 0x1)
++#define   C_008010_DB_BUSY                                            0xFBFFFFFF
++#define   S_008010_CP_COHERENCY_BUSY(x)                               (((x) & 0x1) << 28)
++#define   G_008010_CP_COHERENCY_BUSY(x)                               (((x) >> 28) & 0x1)
++#define   C_008010_CP_COHERENCY_BUSY                                  0xEFFFFFFF
++#define   S_008010_CP_BUSY(x)                                         (((x) & 0x1) << 29)
++#define   G_008010_CP_BUSY(x)                                         (((x) >> 29) & 0x1)
++#define   C_008010_CP_BUSY                                            0xDFFFFFFF
++#define   S_008010_CB_BUSY(x)                                         (((x) & 0x1) << 30)
++#define   G_008010_CB_BUSY(x)                                         (((x) >> 30) & 0x1)
++#define   C_008010_CB_BUSY                                            0xBFFFFFFF
++#define   S_008010_GUI_ACTIVE(x)                                      (((x) & 0x1) << 31)
++#define   G_008010_GUI_ACTIVE(x)                                      (((x) >> 31) & 0x1)
++#define   C_008010_GUI_ACTIVE                                         0x7FFFFFFF
+ #define GRBM_GFX_INDEX                                                  0x802C
+ #define         INSTANCE_INDEX(x)                                     ((x) << 0)
+ #define         SH_INDEX(x)                                           ((x) << 8)
+@@ -278,10 +352,18 @@
+ #define R_0085F8_CP_COHER_BASE                                          0x0085F8
+ 
+ /* CIK */
++#define R_0300FC_CP_STRMOUT_CNTL                                        0x0300FC
++#define   S_0300FC_OFFSET_UPDATE_DONE(x)                              (((x) & 0x1) << 0)
++#define   G_0300FC_OFFSET_UPDATE_DONE(x)                              (((x) >> 0) & 0x1)
++#define   C_0300FC_OFFSET_UPDATE_DONE                                 0xFFFFFFFE
+ #define R_0301E4_CP_COHER_BASE_HI                                       0x0301E4
+ #define   S_0301E4_COHER_BASE_HI_256B(x)                              (((x) & 0xFF) << 0)
+ #define   G_0301E4_COHER_BASE_HI_256B(x)                              (((x) >> 0) & 0xFF)
+ #define   C_0301E4_COHER_BASE_HI_256B                                 0xFFFFFF00
++#define R_0301EC_CP_COHER_START_DELAY                                   0x0301EC
++#define   S_0301EC_START_DELAY_COUNT(x)                               (((x) & 0x3F) << 0)
++#define   G_0301EC_START_DELAY_COUNT(x)                               (((x) >> 0) & 0x3F)
++#define   C_0301EC_START_DELAY_COUNT                                  0xFFFFFFC0
+ #define R_0301F0_CP_COHER_CNTL                                          0x0301F0
+ #define   S_0301F0_DEST_BASE_0_ENA(x)                                 (((x) & 0x1) << 0)
+ #define   G_0301F0_DEST_BASE_0_ENA(x)                                 (((x) >> 0) & 0x1)
+@@ -289,6 +371,14 @@
+ #define   S_0301F0_DEST_BASE_1_ENA(x)                                 (((x) & 0x1) << 1)
+ #define   G_0301F0_DEST_BASE_1_ENA(x)                                 (((x) >> 1) & 0x1)
+ #define   C_0301F0_DEST_BASE_1_ENA                                    0xFFFFFFFD
++/* VI */
++#define   S_0301F0_TC_SD_ACTION_ENA(x)                                (((x) & 0x1) << 2)
++#define   G_0301F0_TC_SD_ACTION_ENA(x)                                (((x) >> 2) & 0x1)
++#define   C_0301F0_TC_SD_ACTION_ENA                                   0xFFFFFFFB
++#define   S_0301F0_TC_NC_ACTION_ENA(x)                                (((x) & 0x1) << 3)
++#define   G_0301F0_TC_NC_ACTION_ENA(x)                                (((x) >> 3) & 0x1)
++#define   C_0301F0_TC_NC_ACTION_ENA                                   0xFFFFFFF7
++/*    */
+ #define   S_0301F0_CB0_DEST_BASE_ENA(x)                               (((x) & 0x1) << 6)
+ #define   G_0301F0_CB0_DEST_BASE_ENA(x)                               (((x) >> 6) & 0x1)
+ #define   C_0301F0_CB0_DEST_BASE_ENA                                  0xFFFFFFBF
+@@ -319,7 +409,7 @@
+ #define   S_0301F0_TCL1_VOL_ACTION_ENA(x)                             (((x) & 0x1) << 15)
+ #define   G_0301F0_TCL1_VOL_ACTION_ENA(x)                             (((x) >> 15) & 0x1)
+ #define   C_0301F0_TCL1_VOL_ACTION_ENA                                0xFFFF7FFF
+-#define   S_0301F0_TC_VOL_ACTION_ENA(x)                               (((x) & 0x1) << 16)
++#define   S_0301F0_TC_VOL_ACTION_ENA(x)                               (((x) & 0x1) << 16) /* not on VI */
+ #define   G_0301F0_TC_VOL_ACTION_ENA(x)                               (((x) >> 16) & 0x1)
+ #define   C_0301F0_TC_VOL_ACTION_ENA                                  0xFFFEFFFF
+ #define   S_0301F0_TC_WB_ACTION_ENA(x)                                (((x) & 0x1) << 18)
+@@ -352,8 +442,29 @@
+ #define   S_0301F0_SH_ICACHE_ACTION_ENA(x)                            (((x) & 0x1) << 29)
+ #define   G_0301F0_SH_ICACHE_ACTION_ENA(x)                            (((x) >> 29) & 0x1)
+ #define   C_0301F0_SH_ICACHE_ACTION_ENA                               0xDFFFFFFF
++/* VI */
++#define   S_0301F0_SH_KCACHE_WB_ACTION_ENA(x)                         (((x) & 0x1) << 30)
++#define   G_0301F0_SH_KCACHE_WB_ACTION_ENA(x)                         (((x) >> 30) & 0x1)
++#define   C_0301F0_SH_KCACHE_WB_ACTION_ENA                            0xBFFFFFFF
++#define   S_0301F0_SH_SD_ACTION_ENA(x)                                (((x) & 0x1) << 31)
++#define   G_0301F0_SH_SD_ACTION_ENA(x)                                (((x) >> 31) & 0x1)
++#define   C_0301F0_SH_SD_ACTION_ENA                                   0x7FFFFFFF
++/*    */
+ #define R_0301F4_CP_COHER_SIZE                                          0x0301F4
+ #define R_0301F8_CP_COHER_BASE                                          0x0301F8
++#define R_0301FC_CP_COHER_STATUS                                        0x0301FC
++#define   S_0301FC_MATCHING_GFX_CNTX(x)                               (((x) & 0xFF) << 0)
++#define   G_0301FC_MATCHING_GFX_CNTX(x)                               (((x) >> 0) & 0xFF)
++#define   C_0301FC_MATCHING_GFX_CNTX                                  0xFFFFFF00
++#define   S_0301FC_MEID(x)                                            (((x) & 0x03) << 24)
++#define   G_0301FC_MEID(x)                                            (((x) >> 24) & 0x03)
++#define   C_0301FC_MEID                                               0xFCFFFFFF
++#define   S_0301FC_PHASE1_STATUS(x)                                   (((x) & 0x1) << 30)
++#define   G_0301FC_PHASE1_STATUS(x)                                   (((x) >> 30) & 0x1)
++#define   C_0301FC_PHASE1_STATUS                                      0xBFFFFFFF
++#define   S_0301FC_STATUS(x)                                          (((x) & 0x1) << 31)
++#define   G_0301FC_STATUS(x)                                          (((x) >> 31) & 0x1)
++#define   C_0301FC_STATUS                                             0x7FFFFFFF
+ #define R_030230_CP_COHER_SIZE_HI                                       0x030230
+ #define   S_030230_COHER_SIZE_HI_256B(x)                              (((x) & 0xFF) << 0)
+ #define   G_030230_COHER_SIZE_HI_256B(x)                              (((x) >> 0) & 0xFF)
+@@ -375,10 +486,6 @@
+ #define   C_0088C4_ES_LIMIT                                           0xFFE0FFFF
+ #define R_0088C8_VGT_ESGS_RING_SIZE                                     0x0088C8
+ #define R_0088CC_VGT_GSVS_RING_SIZE                                     0x0088CC
+-/* CIK */
+-#define R_030900_VGT_ESGS_RING_SIZE                                     0x030900
+-#define R_030904_VGT_GSVS_RING_SIZE                                     0x030904
+-/*     */
+ #define R_0088D4_VGT_GS_VERTEX_REUSE                                    0x0088D4
+ #define   S_0088D4_VERT_REUSE(x)                                      (((x) & 0x1F) << 0)
+ #define   G_0088D4_VERT_REUSE(x)                                      (((x) >> 0) & 0x1F)
+@@ -462,6 +569,27 @@
+ #define   G_008B10_CURRENT_COUNT(x)                                   (((x) >> 8) & 0xFF)
+ #define   C_008B10_CURRENT_COUNT                                      0xFFFF00FF
+ /* CIK */
++#define R_030800_GRBM_GFX_INDEX                                         0x030800
++#define   S_030800_INSTANCE_INDEX(x)                                  (((x) & 0xFF) << 0)
++#define   G_030800_INSTANCE_INDEX(x)                                  (((x) >> 0) & 0xFF)
++#define   C_030800_INSTANCE_INDEX                                     0xFFFFFF00
++#define   S_030800_SH_INDEX(x)                                        (((x) & 0xFF) << 8)
++#define   G_030800_SH_INDEX(x)                                        (((x) >> 8) & 0xFF)
++#define   C_030800_SH_INDEX                                           0xFFFF00FF
++#define   S_030800_SE_INDEX(x)                                        (((x) & 0xFF) << 16)
++#define   G_030800_SE_INDEX(x)                                        (((x) >> 16) & 0xFF)
++#define   C_030800_SE_INDEX                                           0xFF00FFFF
++#define   S_030800_SH_BROADCAST_WRITES(x)                             (((x) & 0x1) << 29)
++#define   G_030800_SH_BROADCAST_WRITES(x)                             (((x) >> 29) & 0x1)
++#define   C_030800_SH_BROADCAST_WRITES                                0xDFFFFFFF
++#define   S_030800_INSTANCE_BROADCAST_WRITES(x)                       (((x) & 0x1) << 30)
++#define   G_030800_INSTANCE_BROADCAST_WRITES(x)                       (((x) >> 30) & 0x1)
++#define   C_030800_INSTANCE_BROADCAST_WRITES                          0xBFFFFFFF
++#define   S_030800_SE_BROADCAST_WRITES(x)                             (((x) & 0x1) << 31)
++#define   G_030800_SE_BROADCAST_WRITES(x)                             (((x) >> 31) & 0x1)
++#define   C_030800_SE_BROADCAST_WRITES                                0x7FFFFFFF
++#define R_030900_VGT_ESGS_RING_SIZE                                     0x030900
++#define R_030904_VGT_GSVS_RING_SIZE                                     0x030904
+ #define R_030908_VGT_PRIMITIVE_TYPE                                     0x030908
+ #define   S_030908_PRIM_TYPE(x)                                       (((x) & 0x3F) << 0)
+ #define   G_030908_PRIM_TYPE(x)                                       (((x) >> 0) & 0x3F)
+@@ -530,6 +658,34 @@
+ #define   S_030A04_CURRENT_COUNT(x)                                   (((x) & 0xFF) << 8)
+ #define   G_030A04_CURRENT_COUNT(x)                                   (((x) >> 8) & 0xFF)
+ #define   C_030A04_CURRENT_COUNT                                      0xFFFF00FF
++#define R_030A10_PA_SC_SCREEN_EXTENT_MIN_0                              0x030A10
++#define   S_030A10_X(x)                                               (((x) & 0xFFFF) << 0)
++#define   G_030A10_X(x)                                               (((x) >> 0) & 0xFFFF)
++#define   C_030A10_X                                                  0xFFFF0000
++#define   S_030A10_Y(x)                                               (((x) & 0xFFFF) << 16)
++#define   G_030A10_Y(x)                                               (((x) >> 16) & 0xFFFF)
++#define   C_030A10_Y                                                  0x0000FFFF
++#define R_030A14_PA_SC_SCREEN_EXTENT_MAX_0                              0x030A14
++#define   S_030A14_X(x)                                               (((x) & 0xFFFF) << 0)
++#define   G_030A14_X(x)                                               (((x) >> 0) & 0xFFFF)
++#define   C_030A14_X                                                  0xFFFF0000
++#define   S_030A14_Y(x)                                               (((x) & 0xFFFF) << 16)
++#define   G_030A14_Y(x)                                               (((x) >> 16) & 0xFFFF)
++#define   C_030A14_Y                                                  0x0000FFFF
++#define R_030A18_PA_SC_SCREEN_EXTENT_MIN_1                              0x030A18
++#define   S_030A18_X(x)                                               (((x) & 0xFFFF) << 0)
++#define   G_030A18_X(x)                                               (((x) >> 0) & 0xFFFF)
++#define   C_030A18_X                                                  0xFFFF0000
++#define   S_030A18_Y(x)                                               (((x) & 0xFFFF) << 16)
++#define   G_030A18_Y(x)                                               (((x) >> 16) & 0xFFFF)
++#define   C_030A18_Y                                                  0x0000FFFF
++#define R_030A2C_PA_SC_SCREEN_EXTENT_MAX_1                              0x030A2C
++#define   S_030A2C_X(x)                                               (((x) & 0xFFFF) << 0)
++#define   G_030A2C_X(x)                                               (((x) >> 0) & 0xFFFF)
++#define   C_030A2C_X                                                  0xFFFF0000
++#define   S_030A2C_Y(x)                                               (((x) & 0xFFFF) << 16)
++#define   G_030A2C_Y(x)                                               (((x) >> 16) & 0xFFFF)
++#define   C_030A2C_Y                                                  0x0000FFFF
+ /*     */
+ #define R_008BF0_PA_SC_ENHANCE                                          0x008BF0
+ #define   S_008BF0_ENABLE_PA_SC_OUT_OF_ORDER(x)                       (((x) & 0x1) << 0)
+@@ -608,6 +764,32 @@
+ #define     V_008DFC_SQ_VGPR                                        0x00
+ /*     */
+ #define R_008DFC_SQ_INST                                                0x008DFC
++#define R_030D20_SQC_CACHES                                             0x030D20
++#define   S_030D20_TARGET_INST(x)                                     (((x) & 0x1) << 0)
++#define   G_030D20_TARGET_INST(x)                                     (((x) >> 0) & 0x1)
++#define   C_030D20_TARGET_INST                                        0xFFFFFFFE
++#define   S_030D20_TARGET_DATA(x)                                     (((x) & 0x1) << 1)
++#define   G_030D20_TARGET_DATA(x)                                     (((x) >> 1) & 0x1)
++#define   C_030D20_TARGET_DATA                                        0xFFFFFFFD
++#define   S_030D20_INVALIDATE(x)                                      (((x) & 0x1) << 2)
++#define   G_030D20_INVALIDATE(x)                                      (((x) >> 2) & 0x1)
++#define   C_030D20_INVALIDATE                                         0xFFFFFFFB
++#define   S_030D20_WRITEBACK(x)                                       (((x) & 0x1) << 3)
++#define   G_030D20_WRITEBACK(x)                                       (((x) >> 3) & 0x1)
++#define   C_030D20_WRITEBACK                                          0xFFFFFFF7
++#define   S_030D20_VOL(x)                                             (((x) & 0x1) << 4)
++#define   G_030D20_VOL(x)                                             (((x) >> 4) & 0x1)
++#define   C_030D20_VOL                                                0xFFFFFFEF
++#define   S_030D20_COMPLETE(x)                                        (((x) & 0x1) << 16)
++#define   G_030D20_COMPLETE(x)                                        (((x) >> 16) & 0x1)
++#define   C_030D20_COMPLETE                                           0xFFFEFFFF
++#define R_030D24_SQC_WRITEBACK                                          0x030D24
++#define   S_030D24_DWB(x)                                             (((x) & 0x1) << 0)
++#define   G_030D24_DWB(x)                                             (((x) >> 0) & 0x1)
++#define   C_030D24_DWB                                                0xFFFFFFFE
++#define   S_030D24_DIRTY(x)                                           (((x) & 0x1) << 1)
++#define   G_030D24_DIRTY(x)                                           (((x) >> 1) & 0x1)
++#define   C_030D24_DIRTY                                              0xFFFFFFFD
+ #define R_008DFC_SQ_VOP1                                                0x008DFC
+ #define   S_008DFC_SRC0(x)                                            (((x) & 0x1FF) << 0)
+ #define   G_008DFC_SRC0(x)                                            (((x) >> 0) & 0x1FF)
+@@ -3740,7 +3922,17 @@
+ #define   C_008DFC_ENCODING                                           0x03FFFFFF
+ #define     V_008DFC_SQ_ENC_MUBUF_FIELD                             0x38
+ #endif
++#define R_030E00_TA_CS_BC_BASE_ADDR                                     0x030E00
++#define R_030E04_TA_CS_BC_BASE_ADDR_HI                                  0x030E04
++#define   S_030E04_ADDRESS(x)                                         (((x) & 0xFF) << 0)
++#define   G_030E04_ADDRESS(x)                                         (((x) >> 0) & 0xFF)
++#define   C_030E04_ADDRESS                                            0xFFFFFF00
++#define R_030F00_DB_OCCLUSION_COUNT0_LOW                                0x030F00
+ #define R_008F00_SQ_BUF_RSRC_WORD0                                      0x008F00
++#define R_030F04_DB_OCCLUSION_COUNT0_HI                                 0x030F04
++#define   S_030F04_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
++#define   G_030F04_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
++#define   C_030F04_COUNT_HI                                           0x80000000
+ #define R_008F04_SQ_BUF_RSRC_WORD1                                      0x008F04
+ #define   S_008F04_BASE_ADDRESS_HI(x)                                 (((x) & 0xFFFF) << 0)
+ #define   G_008F04_BASE_ADDRESS_HI(x)                                 (((x) >> 0) & 0xFFFF)
+@@ -3754,7 +3946,12 @@
+ #define   S_008F04_SWIZZLE_ENABLE(x)                                  (((x) & 0x1) << 31)
+ #define   G_008F04_SWIZZLE_ENABLE(x)                                  (((x) >> 31) & 0x1)
+ #define   C_008F04_SWIZZLE_ENABLE                                     0x7FFFFFFF
++#define R_030F08_DB_OCCLUSION_COUNT1_LOW                                0x030F08
+ #define R_008F08_SQ_BUF_RSRC_WORD2                                      0x008F08
++#define R_030F0C_DB_OCCLUSION_COUNT1_HI                                 0x030F0C
++#define   S_030F0C_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
++#define   G_030F0C_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
++#define   C_030F0C_COUNT_HI                                           0x80000000
+ #define R_008F0C_SQ_BUF_RSRC_WORD3                                      0x008F0C
+ #define   S_008F0C_DST_SEL_X(x)                                       (((x) & 0x07) << 0)
+ #define   G_008F0C_DST_SEL_X(x)                                       (((x) >> 0) & 0x07)
+@@ -3862,7 +4059,12 @@
+ #define     V_008F0C_SQ_RSRC_BUF_RSVD_1                             0x01
+ #define     V_008F0C_SQ_RSRC_BUF_RSVD_2                             0x02
+ #define     V_008F0C_SQ_RSRC_BUF_RSVD_3                             0x03
++#define R_030F10_DB_OCCLUSION_COUNT2_LOW                                0x030F10
+ #define R_008F10_SQ_IMG_RSRC_WORD0                                      0x008F10
++#define R_030F14_DB_OCCLUSION_COUNT2_HI                                 0x030F14
++#define   S_030F14_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
++#define   G_030F14_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
++#define   C_030F14_COUNT_HI                                           0x80000000
+ #define R_008F14_SQ_IMG_RSRC_WORD1                                      0x008F14
+ #define   S_008F14_BASE_ADDRESS_HI(x)                                 (((x) & 0xFF) << 0)
+ #define   G_008F14_BASE_ADDRESS_HI(x)                                 (((x) >> 0) & 0xFF)
+@@ -3961,6 +4163,7 @@
+ #define   G_008F14_MTYPE(x)                                           (((x) >> 30) & 0x03)
+ #define   C_008F14_MTYPE                                              0x3FFFFFFF
+ /*     */
++#define R_030F18_DB_OCCLUSION_COUNT3_LOW                                0x030F18
+ #define R_008F18_SQ_IMG_RSRC_WORD2                                      0x008F18
+ #define   S_008F18_WIDTH(x)                                           (((x) & 0x3FFF) << 0)
+ #define   G_008F18_WIDTH(x)                                           (((x) >> 0) & 0x3FFF)
+@@ -3974,6 +4177,10 @@
+ #define   S_008F18_INTERLACED(x)                                      (((x) & 0x1) << 31)
+ #define   G_008F18_INTERLACED(x)                                      (((x) >> 31) & 0x1)
+ #define   C_008F18_INTERLACED                                         0x7FFFFFFF
++#define R_030F1C_DB_OCCLUSION_COUNT3_HI                                 0x030F1C
++#define   S_030F1C_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
++#define   G_030F1C_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
++#define   C_030F1C_COUNT_HI                                           0x80000000
+ #define R_008F1C_SQ_IMG_RSRC_WORD3                                      0x008F1C
+ #define   S_008F1C_DST_SEL_X(x)                                       (((x) & 0x07) << 0)
+ #define   G_008F1C_DST_SEL_X(x)                                       (((x) >> 0) & 0x07)
+@@ -4084,6 +4291,23 @@
+ #define   G_008F28_LOD_HDW_CNT_EN(x)                                  (((x) >> 20) & 0x1)
+ #define   C_008F28_LOD_HDW_CNT_EN                                     0xFFEFFFFF
+ /*     */
++/* VI */
++#define   S_008F28_COMPRESSION_EN(x)                                  (((x) & 0x1) << 21)
++#define   G_008F28_COMPRESSION_EN(x)                                  (((x) >> 21) & 0x1)
++#define   C_008F28_COMPRESSION_EN                                     0xFFDFFFFF
++#define   S_008F28_ALPHA_IS_ON_MSB(x)                                 (((x) & 0x1) << 22)
++#define   G_008F28_ALPHA_IS_ON_MSB(x)                                 (((x) >> 22) & 0x1)
++#define   C_008F28_ALPHA_IS_ON_MSB                                    0xFFBFFFFF
++#define   S_008F28_COLOR_TRANSFORM(x)                                 (((x) & 0x1) << 23)
++#define   G_008F28_COLOR_TRANSFORM(x)                                 (((x) >> 23) & 0x1)
++#define   C_008F28_COLOR_TRANSFORM                                    0xFF7FFFFF
++#define   S_008F28_LOST_ALPHA_BITS(x)                                 (((x) & 0x0F) << 24)
++#define   G_008F28_LOST_ALPHA_BITS(x)                                 (((x) >> 24) & 0x0F)
++#define   C_008F28_LOST_ALPHA_BITS                                    0xF0FFFFFF
++#define   S_008F28_LOST_COLOR_BITS(x)                                 (((x) & 0x0F) << 28)
++#define   G_008F28_LOST_COLOR_BITS(x)                                 (((x) >> 28) & 0x0F)
++#define   C_008F28_LOST_COLOR_BITS                                    0x0FFFFFFF
++/*    */
+ #define R_008F2C_SQ_IMG_RSRC_WORD7                                      0x008F2C
+ #define R_008F30_SQ_IMG_SAMP_WORD0                                      0x008F30
+ #define   S_008F30_CLAMP_X(x)                                         (((x) & 0x07) << 0)
+@@ -4148,6 +4372,11 @@
+ #define   S_008F30_FILTER_MODE(x)                                     (((x) & 0x03) << 29)
+ #define   G_008F30_FILTER_MODE(x)                                     (((x) >> 29) & 0x03)
+ #define   C_008F30_FILTER_MODE                                        0x9FFFFFFF
++/* VI */
++#define   S_008F30_COMPAT_MODE(x)                                     (((x) & 0x1) << 31)
++#define   G_008F30_COMPAT_MODE(x)                                     (((x) >> 31) & 0x1)
++#define   C_008F30_COMPAT_MODE                                        0x7FFFFFFF
++/*    */
+ #define R_008F34_SQ_IMG_SAMP_WORD1                                      0x008F34
+ #define   S_008F34_MIN_LOD(x)                                         (((x) & 0xFFF) << 0)
+ #define   G_008F34_MIN_LOD(x)                                         (((x) >> 0) & 0xFFF)
+@@ -4313,6 +4542,11 @@
+ #define   G_008F44_OFFSET(x)                                          (((x) >> 0) & 0xFFFFFF)
+ #define   C_008F44_OFFSET                                             0xFF000000
+ /*     */
++#define R_030FF8_DB_ZPASS_COUNT_LOW                                     0x030FF8
++#define R_030FFC_DB_ZPASS_COUNT_HI                                      0x030FFC
++#define   S_030FFC_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
++#define   G_030FFC_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
++#define   C_030FFC_COUNT_HI                                           0x80000000
+ #define R_009100_SPI_CONFIG_CNTL                                        0x009100
+ #define   S_009100_GPR_WRITE_PRIORITY(x)                              (((x) & 0x1FFFFF) << 0)
+ #define   G_009100_GPR_WRITE_PRIORITY(x)                              (((x) >> 0) & 0x1FFFFF)
+@@ -4437,6 +4671,34 @@
+ #define   S_009858_MSAA16_Y(x)                                        (((x) & 0x03) << 18)
+ #define   G_009858_MSAA16_Y(x)                                        (((x) >> 18) & 0x03)
+ #define   C_009858_MSAA16_Y                                           0xFFF3FFFF
++#define R_0098F8_GB_ADDR_CONFIG                                         0x0098F8
++#define   S_0098F8_NUM_PIPES(x)                                       (((x) & 0x07) << 0)
++#define   G_0098F8_NUM_PIPES(x)                                       (((x) >> 0) & 0x07)
++#define   C_0098F8_NUM_PIPES                                          0xFFFFFFF8
++#define   S_0098F8_PIPE_INTERLEAVE_SIZE(x)                            (((x) & 0x07) << 4)
++#define   G_0098F8_PIPE_INTERLEAVE_SIZE(x)                            (((x) >> 4) & 0x07)
++#define   C_0098F8_PIPE_INTERLEAVE_SIZE                               0xFFFFFF8F
++#define   S_0098F8_BANK_INTERLEAVE_SIZE(x)                            (((x) & 0x07) << 8)
++#define   G_0098F8_BANK_INTERLEAVE_SIZE(x)                            (((x) >> 8) & 0x07)
++#define   C_0098F8_BANK_INTERLEAVE_SIZE                               0xFFFFF8FF
++#define   S_0098F8_NUM_SHADER_ENGINES(x)                              (((x) & 0x03) << 12)
++#define   G_0098F8_NUM_SHADER_ENGINES(x)                              (((x) >> 12) & 0x03)
++#define   C_0098F8_NUM_SHADER_ENGINES                                 0xFFFFCFFF
++#define   S_0098F8_SHADER_ENGINE_TILE_SIZE(x)                         (((x) & 0x07) << 16)
++#define   G_0098F8_SHADER_ENGINE_TILE_SIZE(x)                         (((x) >> 16) & 0x07)
++#define   C_0098F8_SHADER_ENGINE_TILE_SIZE                            0xFFF8FFFF
++#define   S_0098F8_NUM_GPUS(x)                                        (((x) & 0x07) << 20)
++#define   G_0098F8_NUM_GPUS(x)                                        (((x) >> 20) & 0x07)
++#define   C_0098F8_NUM_GPUS                                           0xFF8FFFFF
++#define   S_0098F8_MULTI_GPU_TILE_SIZE(x)                             (((x) & 0x03) << 24)
++#define   G_0098F8_MULTI_GPU_TILE_SIZE(x)                             (((x) >> 24) & 0x03)
++#define   C_0098F8_MULTI_GPU_TILE_SIZE                                0xFCFFFFFF
++#define   S_0098F8_ROW_SIZE(x)                                        (((x) & 0x03) << 28)
++#define   G_0098F8_ROW_SIZE(x)                                        (((x) >> 28) & 0x03)
++#define   C_0098F8_ROW_SIZE                                           0xCFFFFFFF
++#define   S_0098F8_NUM_LOWER_PIPES(x)                                 (((x) & 0x1) << 30)
++#define   G_0098F8_NUM_LOWER_PIPES(x)                                 (((x) >> 30) & 0x1)
++#define   C_0098F8_NUM_LOWER_PIPES                                    0xBFFFFFFF
+ #define R_009910_GB_TILE_MODE0                                          0x009910
+ #define   S_009910_MICRO_TILE_MODE(x)                                 (((x) & 0x03) << 0)
+ #define   G_009910_MICRO_TILE_MODE(x)                                 (((x) >> 0) & 0x03)
+@@ -4515,6 +4777,83 @@
+ #define     V_009910_ADDR_SURF_4_BANK                               0x01
+ #define     V_009910_ADDR_SURF_8_BANK                               0x02
+ #define     V_009910_ADDR_SURF_16_BANK                              0x03
++#define   S_009910_MICRO_TILE_MODE_NEW(x)                             (((x) & 0x07) << 22)
++#define   G_009910_MICRO_TILE_MODE_NEW(x)                             (((x) >> 22) & 0x07)
++#define   C_009910_MICRO_TILE_MODE_NEW                                0xFE3FFFFF
++#define   S_009910_SAMPLE_SPLIT(x)                                    (((x) & 0x03) << 25)
++#define   G_009910_SAMPLE_SPLIT(x)                                    (((x) >> 25) & 0x03)
++#define   C_009910_SAMPLE_SPLIT                                       0xF9FFFFFF
++#define R_009914_GB_TILE_MODE1                                          0x009914
++#define R_009918_GB_TILE_MODE2                                          0x009918
++#define R_00991C_GB_TILE_MODE3                                          0x00991C
++#define R_009920_GB_TILE_MODE4                                          0x009920
++#define R_009924_GB_TILE_MODE5                                          0x009924
++#define R_009928_GB_TILE_MODE6                                          0x009928
++#define R_00992C_GB_TILE_MODE7                                          0x00992C
++#define R_009930_GB_TILE_MODE8                                          0x009930
++#define R_009934_GB_TILE_MODE9                                          0x009934
++#define R_009938_GB_TILE_MODE10                                         0x009938
++#define R_00993C_GB_TILE_MODE11                                         0x00993C
++#define R_009940_GB_TILE_MODE12                                         0x009940
++#define R_009944_GB_TILE_MODE13                                         0x009944
++#define R_009948_GB_TILE_MODE14                                         0x009948
++#define R_00994C_GB_TILE_MODE15                                         0x00994C
++#define R_009950_GB_TILE_MODE16                                         0x009950
++#define R_009954_GB_TILE_MODE17                                         0x009954
++#define R_009958_GB_TILE_MODE18                                         0x009958
++#define R_00995C_GB_TILE_MODE19                                         0x00995C
++#define R_009960_GB_TILE_MODE20                                         0x009960
++#define R_009964_GB_TILE_MODE21                                         0x009964
++#define R_009968_GB_TILE_MODE22                                         0x009968
++#define R_00996C_GB_TILE_MODE23                                         0x00996C
++#define R_009970_GB_TILE_MODE24                                         0x009970
++#define R_009974_GB_TILE_MODE25                                         0x009974
++#define R_009978_GB_TILE_MODE26                                         0x009978
++#define R_00997C_GB_TILE_MODE27                                         0x00997C
++#define R_009980_GB_TILE_MODE28                                         0x009980
++#define R_009984_GB_TILE_MODE29                                         0x009984
++#define R_009988_GB_TILE_MODE30                                         0x009988
++#define R_00998C_GB_TILE_MODE31                                         0x00998C
++/* CIK */
++#define R_009990_GB_MACROTILE_MODE0                                     0x009990
++#define   S_009990_BANK_WIDTH(x)                                      (((x) & 0x03) << 0)
++#define   G_009990_BANK_WIDTH(x)                                      (((x) >> 0) & 0x03)
++#define   C_009990_BANK_WIDTH                                         0xFFFFFFFC
++#define   S_009990_BANK_HEIGHT(x)                                     (((x) & 0x03) << 2)
++#define   G_009990_BANK_HEIGHT(x)                                     (((x) >> 2) & 0x03)
++#define   C_009990_BANK_HEIGHT                                        0xFFFFFFF3
++#define   S_009990_MACRO_TILE_ASPECT(x)                               (((x) & 0x03) << 4)
++#define   G_009990_MACRO_TILE_ASPECT(x)                               (((x) >> 4) & 0x03)
++#define   C_009990_MACRO_TILE_ASPECT                                  0xFFFFFFCF
++#define   S_009990_NUM_BANKS(x)                                       (((x) & 0x03) << 6)
++#define   G_009990_NUM_BANKS(x)                                       (((x) >> 6) & 0x03)
++#define   C_009990_NUM_BANKS                                          0xFFFFFF3F
++#define R_009994_GB_MACROTILE_MODE1                                     0x009994
++#define R_009998_GB_MACROTILE_MODE2                                     0x009998
++#define R_00999C_GB_MACROTILE_MODE3                                     0x00999C
++#define R_0099A0_GB_MACROTILE_MODE4                                     0x0099A0
++#define R_0099A4_GB_MACROTILE_MODE5                                     0x0099A4
++#define R_0099A8_GB_MACROTILE_MODE6                                     0x0099A8
++#define R_0099AC_GB_MACROTILE_MODE7                                     0x0099AC
++#define R_0099B0_GB_MACROTILE_MODE8                                     0x0099B0
++#define R_0099B4_GB_MACROTILE_MODE9                                     0x0099B4
++#define R_0099B8_GB_MACROTILE_MODE10                                    0x0099B8
++#define R_0099BC_GB_MACROTILE_MODE11                                    0x0099BC
++#define R_0099C0_GB_MACROTILE_MODE12                                    0x0099C0
++#define R_0099C4_GB_MACROTILE_MODE13                                    0x0099C4
++#define R_0099C8_GB_MACROTILE_MODE14                                    0x0099C8
++#define R_0099CC_GB_MACROTILE_MODE15                                    0x0099CC
++/*     */
++#define R_00B000_SPI_SHADER_TBA_LO_PS                                   0x00B000
++#define R_00B004_SPI_SHADER_TBA_HI_PS                                   0x00B004
++#define   S_00B004_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B004_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B004_MEM_BASE                                           0xFFFFFF00
++#define R_00B008_SPI_SHADER_TMA_LO_PS                                   0x00B008
++#define R_00B00C_SPI_SHADER_TMA_HI_PS                                   0x00B00C
++#define   S_00B00C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B00C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B00C_MEM_BASE                                           0xFFFFFF00
+ /* CIK */
+ #define R_00B01C_SPI_SHADER_PGM_RSRC3_PS                                0x00B01C
+ #define   S_00B01C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
+@@ -4575,6 +4914,9 @@
+ #define   S_00B02C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+ #define   G_00B02C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
+ #define   C_00B02C_USER_SGPR                                          0xFFFFFFC1
++#define   S_00B02C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
++#define   G_00B02C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
++#define   C_00B02C_TRAP_PRESENT                                       0xFFFFFFBF
+ #define   S_00B02C_WAVE_CNT_EN(x)                                     (((x) & 0x1) << 7)
+ #define   G_00B02C_WAVE_CNT_EN(x)                                     (((x) >> 7) & 0x1)
+ #define   C_00B02C_WAVE_CNT_EN                                        0xFFFFFF7F
+@@ -4584,6 +4926,9 @@
+ #define   S_00B02C_EXCP_EN(x)                                         (((x) & 0x7F) << 16) /* mask is 0x1FF on CIK */
+ #define   G_00B02C_EXCP_EN(x)                                         (((x) >> 16) & 0x7F) /* mask is 0x1FF on CIK */
+ #define   C_00B02C_EXCP_EN                                            0xFF80FFFF /* mask is 0x1FF on CIK */
++#define   S_00B02C_EXCP_EN_CIK(x)                                     (((x) & 0x1FF) << 16)
++#define   G_00B02C_EXCP_EN_CIK(x)                                     (((x) >> 16) & 0x1FF)
++#define   C_00B02C_EXCP_EN_CIK                                        0xFE00FFFF
+ #define R_00B030_SPI_SHADER_USER_DATA_PS_0                              0x00B030
+ #define R_00B034_SPI_SHADER_USER_DATA_PS_1                              0x00B034
+ #define R_00B038_SPI_SHADER_USER_DATA_PS_2                              0x00B038
+@@ -4600,6 +4945,16 @@
+ #define R_00B064_SPI_SHADER_USER_DATA_PS_13                             0x00B064
+ #define R_00B068_SPI_SHADER_USER_DATA_PS_14                             0x00B068
+ #define R_00B06C_SPI_SHADER_USER_DATA_PS_15                             0x00B06C
++#define R_00B100_SPI_SHADER_TBA_LO_VS                                   0x00B100
++#define R_00B104_SPI_SHADER_TBA_HI_VS                                   0x00B104
++#define   S_00B104_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B104_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B104_MEM_BASE                                           0xFFFFFF00
++#define R_00B108_SPI_SHADER_TMA_LO_VS                                   0x00B108
++#define R_00B10C_SPI_SHADER_TMA_HI_VS                                   0x00B10C
++#define   S_00B10C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B10C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B10C_MEM_BASE                                           0xFFFFFF00
+ /* CIK */
+ #define R_00B118_SPI_SHADER_PGM_RSRC3_VS                                0x00B118
+ #define   S_00B118_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
+@@ -4667,6 +5022,9 @@
+ #define   S_00B12C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+ #define   G_00B12C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
+ #define   C_00B12C_USER_SGPR                                          0xFFFFFFC1
++#define   S_00B12C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
++#define   G_00B12C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
++#define   C_00B12C_TRAP_PRESENT                                       0xFFFFFFBF
+ #define   S_00B12C_OC_LDS_EN(x)                                       (((x) & 0x1) << 7)
+ #define   G_00B12C_OC_LDS_EN(x)                                       (((x) >> 7) & 0x1)
+ #define   C_00B12C_OC_LDS_EN                                          0xFFFFFF7F
+@@ -4688,6 +5046,14 @@
+ #define   S_00B12C_EXCP_EN(x)                                         (((x) & 0x7F) << 13) /* mask is 0x1FF on CIK */
+ #define   G_00B12C_EXCP_EN(x)                                         (((x) >> 13) & 0x7F) /* mask is 0x1FF on CIK */
+ #define   C_00B12C_EXCP_EN                                            0xFFF01FFF /* mask is 0x1FF on CIK */
++#define   S_00B12C_EXCP_EN_CIK(x)                                     (((x) & 0x1FF) << 13)
++#define   G_00B12C_EXCP_EN_CIK(x)                                     (((x) >> 13) & 0x1FF)
++#define   C_00B12C_EXCP_EN_CIK                                        0xFFC01FFF
++/* VI */
++#define   S_00B12C_DISPATCH_DRAW_EN(x)                                (((x) & 0x1) << 24)
++#define   G_00B12C_DISPATCH_DRAW_EN(x)                                (((x) >> 24) & 0x1)
++#define   C_00B12C_DISPATCH_DRAW_EN                                   0xFEFFFFFF
++/*    */
+ #define R_00B130_SPI_SHADER_USER_DATA_VS_0                              0x00B130
+ #define R_00B134_SPI_SHADER_USER_DATA_VS_1                              0x00B134
+ #define R_00B138_SPI_SHADER_USER_DATA_VS_2                              0x00B138
+@@ -4704,6 +5070,16 @@
+ #define R_00B164_SPI_SHADER_USER_DATA_VS_13                             0x00B164
+ #define R_00B168_SPI_SHADER_USER_DATA_VS_14                             0x00B168
+ #define R_00B16C_SPI_SHADER_USER_DATA_VS_15                             0x00B16C
++#define R_00B200_SPI_SHADER_TBA_LO_GS                                   0x00B200
++#define R_00B204_SPI_SHADER_TBA_HI_GS                                   0x00B204
++#define   S_00B204_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B204_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B204_MEM_BASE                                           0xFFFFFF00
++#define R_00B208_SPI_SHADER_TMA_LO_GS                                   0x00B208
++#define R_00B20C_SPI_SHADER_TMA_HI_GS                                   0x00B20C
++#define   S_00B20C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B20C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B20C_MEM_BASE                                           0xFFFFFF00
+ /* CIK */
+ #define R_00B21C_SPI_SHADER_PGM_RSRC3_GS                                0x00B21C
+ #define   S_00B21C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
+@@ -4716,6 +5092,11 @@
+ #define   G_00B21C_LOCK_LOW_THRESHOLD(x)                              (((x) >> 22) & 0x0F)
+ #define   C_00B21C_LOCK_LOW_THRESHOLD                                 0xFC3FFFFF
+ /*     */
++/* VI */
++#define   S_00B21C_GROUP_FIFO_DEPTH(x)                                (((x) & 0x3F) << 26)
++#define   G_00B21C_GROUP_FIFO_DEPTH(x)                                (((x) >> 26) & 0x3F)
++#define   C_00B21C_GROUP_FIFO_DEPTH                                   0x03FFFFFF
++/*    */
+ #define R_00B220_SPI_SHADER_PGM_LO_GS                                   0x00B220
+ #define R_00B224_SPI_SHADER_PGM_HI_GS                                   0x00B224
+ #define   S_00B224_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+@@ -4764,10 +5145,41 @@
+ #define   S_00B22C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+ #define   G_00B22C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
+ #define   C_00B22C_USER_SGPR                                          0xFFFFFFC1
++#define   S_00B22C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
++#define   G_00B22C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
++#define   C_00B22C_TRAP_PRESENT                                       0xFFFFFFBF
+ #define   S_00B22C_EXCP_EN(x)                                         (((x) & 0x7F) << 7) /* mask is 0x1FF on CIK */
+ #define   G_00B22C_EXCP_EN(x)                                         (((x) >> 7) & 0x7F) /* mask is 0x1FF on CIK */
+ #define   C_00B22C_EXCP_EN                                            0xFFFFC07F /* mask is 0x1FF on CIK */
++#define   S_00B22C_EXCP_EN_CIK(x)                                     (((x) & 0x1FF) << 7)
++#define   G_00B22C_EXCP_EN_CIK(x)                                     (((x) >> 7) & 0x1FF)
++#define   C_00B22C_EXCP_EN_CIK                                        0xFFFF007F
+ #define R_00B230_SPI_SHADER_USER_DATA_GS_0                              0x00B230
++#define R_00B234_SPI_SHADER_USER_DATA_GS_1                              0x00B234
++#define R_00B238_SPI_SHADER_USER_DATA_GS_2                              0x00B238
++#define R_00B23C_SPI_SHADER_USER_DATA_GS_3                              0x00B23C
++#define R_00B240_SPI_SHADER_USER_DATA_GS_4                              0x00B240
++#define R_00B244_SPI_SHADER_USER_DATA_GS_5                              0x00B244
++#define R_00B248_SPI_SHADER_USER_DATA_GS_6                              0x00B248
++#define R_00B24C_SPI_SHADER_USER_DATA_GS_7                              0x00B24C
++#define R_00B250_SPI_SHADER_USER_DATA_GS_8                              0x00B250
++#define R_00B254_SPI_SHADER_USER_DATA_GS_9                              0x00B254
++#define R_00B258_SPI_SHADER_USER_DATA_GS_10                             0x00B258
++#define R_00B25C_SPI_SHADER_USER_DATA_GS_11                             0x00B25C
++#define R_00B260_SPI_SHADER_USER_DATA_GS_12                             0x00B260
++#define R_00B264_SPI_SHADER_USER_DATA_GS_13                             0x00B264
++#define R_00B268_SPI_SHADER_USER_DATA_GS_14                             0x00B268
++#define R_00B26C_SPI_SHADER_USER_DATA_GS_15                             0x00B26C
++#define R_00B300_SPI_SHADER_TBA_LO_ES                                   0x00B300
++#define R_00B304_SPI_SHADER_TBA_HI_ES                                   0x00B304
++#define   S_00B304_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B304_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B304_MEM_BASE                                           0xFFFFFF00
++#define R_00B308_SPI_SHADER_TMA_LO_ES                                   0x00B308
++#define R_00B30C_SPI_SHADER_TMA_HI_ES                                   0x00B30C
++#define   S_00B30C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B30C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B30C_MEM_BASE                                           0xFFFFFF00
+ /* CIK */
+ #define R_00B31C_SPI_SHADER_PGM_RSRC3_ES                                0x00B31C
+ #define   S_00B31C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
+@@ -4780,6 +5192,11 @@
+ #define   G_00B31C_LOCK_LOW_THRESHOLD(x)                              (((x) >> 22) & 0x0F)
+ #define   C_00B31C_LOCK_LOW_THRESHOLD                                 0xFC3FFFFF
+ /*     */
++/* VI */
++#define   S_00B31C_GROUP_FIFO_DEPTH(x)                                (((x) & 0x3F) << 26)
++#define   G_00B31C_GROUP_FIFO_DEPTH(x)                                (((x) >> 26) & 0x3F)
++#define   C_00B31C_GROUP_FIFO_DEPTH                                   0x03FFFFFF
++/*    */
+ #define R_00B320_SPI_SHADER_PGM_LO_ES                                   0x00B320
+ #define R_00B324_SPI_SHADER_PGM_HI_ES                                   0x00B324
+ #define   S_00B324_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+@@ -4831,6 +5248,9 @@
+ #define   S_00B32C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+ #define   G_00B32C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
+ #define   C_00B32C_USER_SGPR                                          0xFFFFFFC1
++#define   S_00B32C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
++#define   G_00B32C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
++#define   C_00B32C_TRAP_PRESENT                                       0xFFFFFFBF
+ #define   S_00B32C_OC_LDS_EN(x)                                       (((x) & 0x1) << 7)
+ #define   G_00B32C_OC_LDS_EN(x)                                       (((x) >> 7) & 0x1)
+ #define   C_00B32C_OC_LDS_EN                                          0xFFFFFF7F
+@@ -4841,6 +5261,31 @@
+ #define   G_00B32C_LDS_SIZE(x)                                        (((x) >> 20) & 0x1FF) /* CIK, for on-chip GS */
+ #define   C_00B32C_LDS_SIZE                                           0xE00FFFFF /* CIK, for on-chip GS */
+ #define R_00B330_SPI_SHADER_USER_DATA_ES_0                              0x00B330
++#define R_00B334_SPI_SHADER_USER_DATA_ES_1                              0x00B334
++#define R_00B338_SPI_SHADER_USER_DATA_ES_2                              0x00B338
++#define R_00B33C_SPI_SHADER_USER_DATA_ES_3                              0x00B33C
++#define R_00B340_SPI_SHADER_USER_DATA_ES_4                              0x00B340
++#define R_00B344_SPI_SHADER_USER_DATA_ES_5                              0x00B344
++#define R_00B348_SPI_SHADER_USER_DATA_ES_6                              0x00B348
++#define R_00B34C_SPI_SHADER_USER_DATA_ES_7                              0x00B34C
++#define R_00B350_SPI_SHADER_USER_DATA_ES_8                              0x00B350
++#define R_00B354_SPI_SHADER_USER_DATA_ES_9                              0x00B354
++#define R_00B358_SPI_SHADER_USER_DATA_ES_10                             0x00B358
++#define R_00B35C_SPI_SHADER_USER_DATA_ES_11                             0x00B35C
++#define R_00B360_SPI_SHADER_USER_DATA_ES_12                             0x00B360
++#define R_00B364_SPI_SHADER_USER_DATA_ES_13                             0x00B364
++#define R_00B368_SPI_SHADER_USER_DATA_ES_14                             0x00B368
++#define R_00B36C_SPI_SHADER_USER_DATA_ES_15                             0x00B36C
++#define R_00B400_SPI_SHADER_TBA_LO_HS                                   0x00B400
++#define R_00B404_SPI_SHADER_TBA_HI_HS                                   0x00B404
++#define   S_00B404_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B404_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B404_MEM_BASE                                           0xFFFFFF00
++#define R_00B408_SPI_SHADER_TMA_LO_HS                                   0x00B408
++#define R_00B40C_SPI_SHADER_TMA_HI_HS                                   0x00B40C
++#define   S_00B40C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B40C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B40C_MEM_BASE                                           0xFFFFFF00
+ /* CIK */
+ #define R_00B41C_SPI_SHADER_PGM_RSRC3_HS                                0x00B41C
+ #define   S_00B41C_WAVE_LIMIT(x)                                      (((x) & 0x3F) << 0)
+@@ -4850,6 +5295,11 @@
+ #define   G_00B41C_LOCK_LOW_THRESHOLD(x)                              (((x) >> 6) & 0x0F)
+ #define   C_00B41C_LOCK_LOW_THRESHOLD                                 0xFFFFFC3F
+ /*     */
++/* VI */
++#define   S_00B41C_GROUP_FIFO_DEPTH(x)                                (((x) & 0x3F) << 10)
++#define   G_00B41C_GROUP_FIFO_DEPTH(x)                                (((x) >> 10) & 0x3F)
++#define   C_00B41C_GROUP_FIFO_DEPTH                                   0xFFFF03FF
++/*    */
+ #define R_00B420_SPI_SHADER_PGM_LO_HS                                   0x00B420
+ #define R_00B424_SPI_SHADER_PGM_HI_HS                                   0x00B424
+ #define   S_00B424_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+@@ -4895,6 +5345,9 @@
+ #define   S_00B42C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+ #define   G_00B42C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
+ #define   C_00B42C_USER_SGPR                                          0xFFFFFFC1
++#define   S_00B42C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
++#define   G_00B42C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
++#define   C_00B42C_TRAP_PRESENT                                       0xFFFFFFBF
+ #define   S_00B42C_OC_LDS_EN(x)                                       (((x) & 0x1) << 7)
+ #define   G_00B42C_OC_LDS_EN(x)                                       (((x) >> 7) & 0x1)
+ #define   C_00B42C_OC_LDS_EN                                          0xFFFFFF7F
+@@ -4905,6 +5358,31 @@
+ #define   G_00B42C_EXCP_EN(x)                                         (((x) >> 9) & 0x7F) /* mask is 0x1FF on CIK */
+ #define   C_00B42C_EXCP_EN                                            0xFFFF01FF /* mask is 0x1FF on CIK */
+ #define R_00B430_SPI_SHADER_USER_DATA_HS_0                              0x00B430
++#define R_00B434_SPI_SHADER_USER_DATA_HS_1                              0x00B434
++#define R_00B438_SPI_SHADER_USER_DATA_HS_2                              0x00B438
++#define R_00B43C_SPI_SHADER_USER_DATA_HS_3                              0x00B43C
++#define R_00B440_SPI_SHADER_USER_DATA_HS_4                              0x00B440
++#define R_00B444_SPI_SHADER_USER_DATA_HS_5                              0x00B444
++#define R_00B448_SPI_SHADER_USER_DATA_HS_6                              0x00B448
++#define R_00B44C_SPI_SHADER_USER_DATA_HS_7                              0x00B44C
++#define R_00B450_SPI_SHADER_USER_DATA_HS_8                              0x00B450
++#define R_00B454_SPI_SHADER_USER_DATA_HS_9                              0x00B454
++#define R_00B458_SPI_SHADER_USER_DATA_HS_10                             0x00B458
++#define R_00B45C_SPI_SHADER_USER_DATA_HS_11                             0x00B45C
++#define R_00B460_SPI_SHADER_USER_DATA_HS_12                             0x00B460
++#define R_00B464_SPI_SHADER_USER_DATA_HS_13                             0x00B464
++#define R_00B468_SPI_SHADER_USER_DATA_HS_14                             0x00B468
++#define R_00B46C_SPI_SHADER_USER_DATA_HS_15                             0x00B46C
++#define R_00B500_SPI_SHADER_TBA_LO_LS                                   0x00B500
++#define R_00B504_SPI_SHADER_TBA_HI_LS                                   0x00B504
++#define   S_00B504_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B504_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B504_MEM_BASE                                           0xFFFFFF00
++#define R_00B508_SPI_SHADER_TMA_LO_LS                                   0x00B508
++#define R_00B50C_SPI_SHADER_TMA_HI_LS                                   0x00B50C
++#define   S_00B50C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
++#define   G_00B50C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
++#define   C_00B50C_MEM_BASE                                           0xFFFFFF00
+ /* CIK */
+ #define R_00B51C_SPI_SHADER_PGM_RSRC3_LS                                0x00B51C
+ #define   S_00B51C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
+@@ -4917,6 +5395,11 @@
+ #define   G_00B51C_LOCK_LOW_THRESHOLD(x)                              (((x) >> 22) & 0x0F)
+ #define   C_00B51C_LOCK_LOW_THRESHOLD                                 0xFC3FFFFF
+ /*     */
++/* VI */
++#define   S_00B51C_GROUP_FIFO_DEPTH(x)                                (((x) & 0x3F) << 26)
++#define   G_00B51C_GROUP_FIFO_DEPTH(x)                                (((x) >> 26) & 0x3F)
++#define   C_00B51C_GROUP_FIFO_DEPTH                                   0x03FFFFFF
++/*    */
+ #define R_00B520_SPI_SHADER_PGM_LO_LS                                   0x00B520
+ #define R_00B524_SPI_SHADER_PGM_HI_LS                                   0x00B524
+ #define   S_00B524_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+@@ -4965,6 +5448,9 @@
+ #define   S_00B52C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+ #define   G_00B52C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
+ #define   C_00B52C_USER_SGPR                                          0xFFFFFFC1
++#define   S_00B52C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
++#define   G_00B52C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
++#define   C_00B52C_TRAP_PRESENT                                       0xFFFFFFBF
+ #define   S_00B52C_LDS_SIZE(x)                                        (((x) & 0x1FF) << 7)
+ #define   G_00B52C_LDS_SIZE(x)                                        (((x) >> 7) & 0x1FF)
+ #define   C_00B52C_LDS_SIZE                                           0xFFFF007F
+@@ -4972,6 +5458,21 @@
+ #define   G_00B52C_EXCP_EN(x)                                         (((x) >> 16) & 0x7F) /* mask is 0x1FF on CIK */
+ #define   C_00B52C_EXCP_EN                                            0xFF80FFFF /* mask is 0x1FF on CIK */
+ #define R_00B530_SPI_SHADER_USER_DATA_LS_0                              0x00B530
++#define R_00B534_SPI_SHADER_USER_DATA_LS_1                              0x00B534
++#define R_00B538_SPI_SHADER_USER_DATA_LS_2                              0x00B538
++#define R_00B53C_SPI_SHADER_USER_DATA_LS_3                              0x00B53C
++#define R_00B540_SPI_SHADER_USER_DATA_LS_4                              0x00B540
++#define R_00B544_SPI_SHADER_USER_DATA_LS_5                              0x00B544
++#define R_00B548_SPI_SHADER_USER_DATA_LS_6                              0x00B548
++#define R_00B54C_SPI_SHADER_USER_DATA_LS_7                              0x00B54C
++#define R_00B550_SPI_SHADER_USER_DATA_LS_8                              0x00B550
++#define R_00B554_SPI_SHADER_USER_DATA_LS_9                              0x00B554
++#define R_00B558_SPI_SHADER_USER_DATA_LS_10                             0x00B558
++#define R_00B55C_SPI_SHADER_USER_DATA_LS_11                             0x00B55C
++#define R_00B560_SPI_SHADER_USER_DATA_LS_12                             0x00B560
++#define R_00B564_SPI_SHADER_USER_DATA_LS_13                             0x00B564
++#define R_00B568_SPI_SHADER_USER_DATA_LS_14                             0x00B568
++#define R_00B56C_SPI_SHADER_USER_DATA_LS_15                             0x00B56C
+ #define R_00B800_COMPUTE_DISPATCH_INITIATOR                             0x00B800
+ #define   S_00B800_COMPUTE_SHADER_EN(x)                               (((x) & 0x1) << 0)
+ #define   G_00B800_COMPUTE_SHADER_EN(x)                               (((x) >> 0) & 0x1)
+@@ -5042,6 +5543,16 @@
+ #define   S_00B82C_MAX_WAVE_ID(x)                                     (((x) & 0xFFF) << 0)
+ #define   G_00B82C_MAX_WAVE_ID(x)                                     (((x) >> 0) & 0xFFF)
+ #define   C_00B82C_MAX_WAVE_ID                                        0xFFFFF000
++/* CIK */
++#define R_00B828_COMPUTE_PIPELINESTAT_ENABLE                            0x00B828
++#define   S_00B828_PIPELINESTAT_ENABLE(x)                             (((x) & 0x1) << 0)
++#define   G_00B828_PIPELINESTAT_ENABLE(x)                             (((x) >> 0) & 0x1)
++#define   C_00B828_PIPELINESTAT_ENABLE                                0xFFFFFFFE
++#define R_00B82C_COMPUTE_PERFCOUNT_ENABLE                               0x00B82C
++#define   S_00B82C_PERFCOUNT_ENABLE(x)                                (((x) & 0x1) << 0)
++#define   G_00B82C_PERFCOUNT_ENABLE(x)                                (((x) >> 0) & 0x1)
++#define   C_00B82C_PERFCOUNT_ENABLE                                   0xFFFFFFFE
++/*     */
+ #define R_00B830_COMPUTE_PGM_LO                                         0x00B830
+ #define R_00B834_COMPUTE_PGM_HI                                         0x00B834
+ #define   S_00B834_DATA(x)                                            (((x) & 0xFF) << 0)
+@@ -5052,6 +5563,16 @@
+ #define   G_00B834_INST_ATC(x)                                        (((x) >> 8) & 0x1)
+ #define   C_00B834_INST_ATC                                           0xFFFFFEFF
+ /*     */
++#define R_00B838_COMPUTE_TBA_LO                                         0x00B838
++#define R_00B83C_COMPUTE_TBA_HI                                         0x00B83C
++#define   S_00B83C_DATA(x)                                            (((x) & 0xFF) << 0)
++#define   G_00B83C_DATA(x)                                            (((x) >> 0) & 0xFF)
++#define   C_00B83C_DATA                                               0xFFFFFF00
++#define R_00B840_COMPUTE_TMA_LO                                         0x00B840
++#define R_00B844_COMPUTE_TMA_HI                                         0x00B844
++#define   S_00B844_DATA(x)                                            (((x) & 0xFF) << 0)
++#define   G_00B844_DATA(x)                                            (((x) >> 0) & 0xFF)
++#define   C_00B844_DATA                                               0xFFFFFF00
+ #define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
+ #define   S_00B848_VGPRS(x)                                           (((x) & 0x3F) << 0)
+ #define   G_00B848_VGPRS(x)                                           (((x) >> 0) & 0x3F)
+@@ -5092,6 +5613,9 @@
+ #define   S_00B84C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+ #define   G_00B84C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
+ #define   C_00B84C_USER_SGPR                                          0xFFFFFFC1
++#define   S_00B84C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
++#define   G_00B84C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
++#define   C_00B84C_TRAP_PRESENT                                       0xFFFFFFBF
+ #define   S_00B84C_TGID_X_EN(x)                                       (((x) & 0x1) << 7)
+ #define   G_00B84C_TGID_X_EN(x)                                       (((x) >> 7) & 0x1)
+ #define   C_00B84C_TGID_X_EN                                          0xFFFFFF7F
+@@ -5118,6 +5642,10 @@
+ #define   S_00B84C_EXCP_EN(x)                                         (((x) & 0x7F) << 24)
+ #define   G_00B84C_EXCP_EN(x)                                         (((x) >> 24) & 0x7F)
+ #define   C_00B84C_EXCP_EN                                            0x80FFFFFF
++#define R_00B850_COMPUTE_VMID                                           0x00B850
++#define   S_00B850_DATA(x)                                            (((x) & 0x0F) << 0)
++#define   G_00B850_DATA(x)                                            (((x) >> 0) & 0x0F)
++#define   C_00B850_DATA                                               0xFFFFFFF0
+ #define R_00B854_COMPUTE_RESOURCE_LIMITS                                0x00B854
+ #define   S_00B854_WAVES_PER_SH(x)                                    (((x) & 0x3F) << 0) /* mask is 0x3FF on CIK */
+ #define   G_00B854_WAVES_PER_SH(x)                                    (((x) >> 0) & 0x3F) /* mask is 0x3FF on CIK */
+@@ -5160,7 +5688,84 @@
+ #define   S_00B860_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
+ #define   G_00B860_WAVESIZE(x)                                        (((x) >> 12) & 0x1FFF)
+ #define   C_00B860_WAVESIZE                                           0xFE000FFF
++/* CIK */
++#define R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2                         0x00B864
++#define   S_00B864_SH0_CU_EN(x)                                       (((x) & 0xFFFF) << 0)
++#define   G_00B864_SH0_CU_EN(x)                                       (((x) >> 0) & 0xFFFF)
++#define   C_00B864_SH0_CU_EN                                          0xFFFF0000
++#define   S_00B864_SH1_CU_EN(x)                                       (((x) & 0xFFFF) << 16)
++#define   G_00B864_SH1_CU_EN(x)                                       (((x) >> 16) & 0xFFFF)
++#define   C_00B864_SH1_CU_EN                                          0x0000FFFF
++#define R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3                         0x00B868
++#define   S_00B868_SH0_CU_EN(x)                                       (((x) & 0xFFFF) << 0)
++#define   G_00B868_SH0_CU_EN(x)                                       (((x) >> 0) & 0xFFFF)
++#define   C_00B868_SH0_CU_EN                                          0xFFFF0000
++#define   S_00B868_SH1_CU_EN(x)                                       (((x) & 0xFFFF) << 16)
++#define   G_00B868_SH1_CU_EN(x)                                       (((x) >> 16) & 0xFFFF)
++#define   C_00B868_SH1_CU_EN                                          0x0000FFFF
++#define R_00B86C_COMPUTE_RESTART_X                                      0x00B86C
++#define R_00B870_COMPUTE_RESTART_Y                                      0x00B870
++#define R_00B874_COMPUTE_RESTART_Z                                      0x00B874
++#define R_00B87C_COMPUTE_MISC_RESERVED                                  0x00B87C
++#define   S_00B87C_SEND_SEID(x)                                       (((x) & 0x03) << 0)
++#define   G_00B87C_SEND_SEID(x)                                       (((x) >> 0) & 0x03)
++#define   C_00B87C_SEND_SEID                                          0xFFFFFFFC
++#define   S_00B87C_RESERVED2(x)                                       (((x) & 0x1) << 2)
++#define   G_00B87C_RESERVED2(x)                                       (((x) >> 2) & 0x1)
++#define   C_00B87C_RESERVED2                                          0xFFFFFFFB
++#define   S_00B87C_RESERVED3(x)                                       (((x) & 0x1) << 3)
++#define   G_00B87C_RESERVED3(x)                                       (((x) >> 3) & 0x1)
++#define   C_00B87C_RESERVED3                                          0xFFFFFFF7
++#define   S_00B87C_RESERVED4(x)                                       (((x) & 0x1) << 4)
++#define   G_00B87C_RESERVED4(x)                                       (((x) >> 4) & 0x1)
++#define   C_00B87C_RESERVED4                                          0xFFFFFFEF
++/* VI */
++#define   S_00B87C_WAVE_ID_BASE(x)                                    (((x) & 0xFFF) << 5)
++#define   G_00B87C_WAVE_ID_BASE(x)                                    (((x) >> 5) & 0xFFF)
++#define   C_00B87C_WAVE_ID_BASE                                       0xFFFE001F
++#define R_00B880_COMPUTE_DISPATCH_ID                                    0x00B880
++#define R_00B884_COMPUTE_THREADGROUP_ID                                 0x00B884
++#define R_00B888_COMPUTE_RELAUNCH                                       0x00B888
++#define   S_00B888_PAYLOAD(x)                                         (((x) & 0x3FFFFFFF) << 0)
++#define   G_00B888_PAYLOAD(x)                                         (((x) >> 0) & 0x3FFFFFFF)
++#define   C_00B888_PAYLOAD                                            0xC0000000
++#define   S_00B888_IS_EVENT(x)                                        (((x) & 0x1) << 30)
++#define   G_00B888_IS_EVENT(x)                                        (((x) >> 30) & 0x1)
++#define   C_00B888_IS_EVENT                                           0xBFFFFFFF
++#define   S_00B888_IS_STATE(x)                                        (((x) & 0x1) << 31)
++#define   G_00B888_IS_STATE(x)                                        (((x) >> 31) & 0x1)
++#define   C_00B888_IS_STATE                                           0x7FFFFFFF
++#define R_00B88C_COMPUTE_WAVE_RESTORE_ADDR_LO                           0x00B88C
++#define R_00B890_COMPUTE_WAVE_RESTORE_ADDR_HI                           0x00B890
++#define   S_00B890_ADDR(x)                                            (((x) & 0xFFFF) << 0)
++#define   G_00B890_ADDR(x)                                            (((x) >> 0) & 0xFFFF)
++#define   C_00B890_ADDR                                               0xFFFF0000
++#define R_00B894_COMPUTE_WAVE_RESTORE_CONTROL                           0x00B894
++#define   S_00B894_ATC(x)                                             (((x) & 0x1) << 0)
++#define   G_00B894_ATC(x)                                             (((x) >> 0) & 0x1)
++#define   C_00B894_ATC                                                0xFFFFFFFE
++#define   S_00B894_MTYPE(x)                                           (((x) & 0x03) << 1)
++#define   G_00B894_MTYPE(x)                                           (((x) >> 1) & 0x03)
++#define   C_00B894_MTYPE                                              0xFFFFFFF9
++/*    */
++/*     */
+ #define R_00B900_COMPUTE_USER_DATA_0                                    0x00B900
++#define R_00B904_COMPUTE_USER_DATA_1                                    0x00B904
++#define R_00B908_COMPUTE_USER_DATA_2                                    0x00B908
++#define R_00B90C_COMPUTE_USER_DATA_3                                    0x00B90C
++#define R_00B910_COMPUTE_USER_DATA_4                                    0x00B910
++#define R_00B914_COMPUTE_USER_DATA_5                                    0x00B914
++#define R_00B918_COMPUTE_USER_DATA_6                                    0x00B918
++#define R_00B91C_COMPUTE_USER_DATA_7                                    0x00B91C
++#define R_00B920_COMPUTE_USER_DATA_8                                    0x00B920
++#define R_00B924_COMPUTE_USER_DATA_9                                    0x00B924
++#define R_00B928_COMPUTE_USER_DATA_10                                   0x00B928
++#define R_00B92C_COMPUTE_USER_DATA_11                                   0x00B92C
++#define R_00B930_COMPUTE_USER_DATA_12                                   0x00B930
++#define R_00B934_COMPUTE_USER_DATA_13                                   0x00B934
++#define R_00B938_COMPUTE_USER_DATA_14                                   0x00B938
++#define R_00B93C_COMPUTE_USER_DATA_15                                   0x00B93C
++#define R_00B9FC_COMPUTE_NOWHERE                                        0x00B9FC
+ #define R_028000_DB_RENDER_CONTROL                                      0x028000
+ #define   S_028000_DEPTH_CLEAR_ENABLE(x)                              (((x) & 0x1) << 0)
+ #define   G_028000_DEPTH_CLEAR_ENABLE(x)                              (((x) >> 0) & 0x1)
+@@ -5189,6 +5794,11 @@
+ #define   S_028000_COPY_SAMPLE(x)                                     (((x) & 0x0F) << 8)
+ #define   G_028000_COPY_SAMPLE(x)                                     (((x) >> 8) & 0x0F)
+ #define   C_028000_COPY_SAMPLE                                        0xFFFFF0FF
++/* VI */
++#define   S_028000_DECOMPRESS_ENABLE(x)                               (((x) & 0x1) << 12)
++#define   G_028000_DECOMPRESS_ENABLE(x)                               (((x) >> 12) & 0x1)
++#define   C_028000_DECOMPRESS_ENABLE                                  0xFFFFEFFF
++/*    */
+ #define R_028004_DB_COUNT_CONTROL                                       0x028004
+ #define   S_028004_ZPASS_INCREMENT_DISABLE(x)                         (((x) & 0x1) << 0)
+ #define   G_028004_ZPASS_INCREMENT_DISABLE(x)                         (((x) >> 0) & 0x1)
+@@ -5467,9 +6077,6 @@
+ #define   S_028040_NUM_SAMPLES(x)                                     (((x) & 0x03) << 2)
+ #define   G_028040_NUM_SAMPLES(x)                                     (((x) >> 2) & 0x03)
+ #define   C_028040_NUM_SAMPLES                                        0xFFFFFFF3
+-#define   S_028040_TILE_MODE_INDEX(x)                                 (((x) & 0x07) << 20) /* not on CIK */
+-#define   G_028040_TILE_MODE_INDEX(x)                                 (((x) >> 20) & 0x07) /* not on CIK */
+-#define   C_028040_TILE_MODE_INDEX                                    0xFF8FFFFF /* not on CIK */
+ /* CIK */
+ #define   S_028040_TILE_SPLIT(x)                                      (((x) & 0x07) << 13)
+ #define   G_028040_TILE_SPLIT(x)                                      (((x) >> 13) & 0x07)
+@@ -5482,6 +6089,14 @@
+ #define     V_028040_ADDR_SURF_TILE_SPLIT_2KB                       0x05
+ #define     V_028040_ADDR_SURF_TILE_SPLIT_4KB                       0x06
+ /*     */
++#define   S_028040_TILE_MODE_INDEX(x)                                 (((x) & 0x07) << 20) /* not on CIK */
++#define   G_028040_TILE_MODE_INDEX(x)                                 (((x) >> 20) & 0x07) /* not on CIK */
++#define   C_028040_TILE_MODE_INDEX                                    0xFF8FFFFF /* not on CIK */
++/* VI */
++#define   S_028040_DECOMPRESS_ON_N_ZPLANES(x)                         (((x) & 0x0F) << 23)
++#define   G_028040_DECOMPRESS_ON_N_ZPLANES(x)                         (((x) >> 23) & 0x0F)
++#define   C_028040_DECOMPRESS_ON_N_ZPLANES                            0xF87FFFFF
++/*    */
+ #define   S_028040_ALLOW_EXPCLEAR(x)                                  (((x) & 0x1) << 27)
+ #define   G_028040_ALLOW_EXPCLEAR(x)                                  (((x) >> 27) & 0x1)
+ #define   C_028040_ALLOW_EXPCLEAR                                     0xF7FFFFFF
+@@ -5491,6 +6106,11 @@
+ #define   S_028040_TILE_SURFACE_ENABLE(x)                             (((x) & 0x1) << 29)
+ #define   G_028040_TILE_SURFACE_ENABLE(x)                             (((x) >> 29) & 0x1)
+ #define   C_028040_TILE_SURFACE_ENABLE                                0xDFFFFFFF
++/* VI */
++#define   S_028040_CLEAR_DISALLOWED(x)                                (((x) & 0x1) << 30)
++#define   G_028040_CLEAR_DISALLOWED(x)                                (((x) >> 30) & 0x1)
++#define   C_028040_CLEAR_DISALLOWED                                   0xBFFFFFFF
++/*    */
+ #define   S_028040_ZRANGE_PRECISION(x)                                (((x) & 0x1) << 31)
+ #define   G_028040_ZRANGE_PRECISION(x)                                (((x) >> 31) & 0x1)
+ #define   C_028040_ZRANGE_PRECISION                                   0x7FFFFFFF
+@@ -5500,9 +6120,6 @@
+ #define   C_028044_FORMAT                                             0xFFFFFFFE
+ #define     V_028044_STENCIL_INVALID                                0x00
+ #define     V_028044_STENCIL_8                                      0x01
+-#define   S_028044_TILE_MODE_INDEX(x)                                 (((x) & 0x07) << 20) /* not on CIK */
+-#define   G_028044_TILE_MODE_INDEX(x)                                 (((x) >> 20) & 0x07) /* not on CIK */
+-#define   C_028044_TILE_MODE_INDEX                                    0xFF8FFFFF /* not on CIK */
+ /* CIK */
+ #define   S_028044_TILE_SPLIT(x)                                      (((x) & 0x07) << 13)
+ #define   G_028044_TILE_SPLIT(x)                                      (((x) >> 13) & 0x07)
+@@ -5515,12 +6132,20 @@
+ #define     V_028044_ADDR_SURF_TILE_SPLIT_2KB                       0x05
+ #define     V_028044_ADDR_SURF_TILE_SPLIT_4KB                       0x06
+ /*     */
++#define   S_028044_TILE_MODE_INDEX(x)                                 (((x) & 0x07) << 20) /* not on CIK */
++#define   G_028044_TILE_MODE_INDEX(x)                                 (((x) >> 20) & 0x07) /* not on CIK */
++#define   C_028044_TILE_MODE_INDEX                                    0xFF8FFFFF /* not on CIK */
+ #define   S_028044_ALLOW_EXPCLEAR(x)                                  (((x) & 0x1) << 27)
+ #define   G_028044_ALLOW_EXPCLEAR(x)                                  (((x) >> 27) & 0x1)
+ #define   C_028044_ALLOW_EXPCLEAR                                     0xF7FFFFFF
+ #define   S_028044_TILE_STENCIL_DISABLE(x)                            (((x) & 0x1) << 29)
+ #define   G_028044_TILE_STENCIL_DISABLE(x)                            (((x) >> 29) & 0x1)
+ #define   C_028044_TILE_STENCIL_DISABLE                               0xDFFFFFFF
++/* VI */
++#define   S_028044_CLEAR_DISALLOWED(x)                                (((x) & 0x1) << 30)
++#define   G_028044_CLEAR_DISALLOWED(x)                                (((x) >> 30) & 0x1)
++#define   C_028044_CLEAR_DISALLOWED                                   0xBFFFFFFF
++/*    */
+ #define R_028048_DB_Z_READ_BASE                                         0x028048
+ #define R_02804C_DB_STENCIL_READ_BASE                                   0x02804C
+ #define R_028050_DB_Z_WRITE_BASE                                        0x028050
+@@ -5542,7 +6167,13 @@
+ #define   S_028084_ADDRESS(x)                                         (((x) & 0xFF) << 0)
+ #define   G_028084_ADDRESS(x)                                         (((x) >> 0) & 0xFF)
+ #define   C_028084_ADDRESS                                            0xFFFFFF00
+-/* */
++#define R_0281E8_COHER_DEST_BASE_HI_0                                   0x0281E8
++#define R_0281EC_COHER_DEST_BASE_HI_1                                   0x0281EC
++#define R_0281F0_COHER_DEST_BASE_HI_2                                   0x0281F0
++#define R_0281F4_COHER_DEST_BASE_HI_3                                   0x0281F4
++/*     */
++#define R_0281F8_COHER_DEST_BASE_2                                      0x0281F8
++#define R_0281FC_COHER_DEST_BASE_3                                      0x0281FC
+ #define R_028200_PA_SC_WINDOW_OFFSET                                    0x028200
+ #define   S_028200_WINDOW_X_OFFSET(x)                                 (((x) & 0xFFFF) << 0)
+ #define   G_028200_WINDOW_X_OFFSET(x)                                 (((x) >> 0) & 0xFFFF)
+@@ -5687,6 +6318,8 @@
+ #define   S_028244_BR_Y(x)                                            (((x) & 0x7FFF) << 16)
+ #define   G_028244_BR_Y(x)                                            (((x) >> 16) & 0x7FFF)
+ #define   C_028244_BR_Y                                               0x8000FFFF
++#define R_028248_COHER_DEST_BASE_0                                      0x028248
++#define R_02824C_COHER_DEST_BASE_1                                      0x02824C
+ #define R_028250_PA_SC_VPORT_SCISSOR_0_TL                               0x028250
+ #define   S_028250_TL_X(x)                                            (((x) & 0x7FFF) << 0)
+ #define   G_028250_TL_X(x)                                            (((x) >> 0) & 0x7FFF)
+@@ -5704,8 +6337,68 @@
+ #define   S_028254_BR_Y(x)                                            (((x) & 0x7FFF) << 16)
+ #define   G_028254_BR_Y(x)                                            (((x) >> 16) & 0x7FFF)
+ #define   C_028254_BR_Y                                               0x8000FFFF
++#define R_028258_PA_SC_VPORT_SCISSOR_1_TL                               0x028258
++#define R_02825C_PA_SC_VPORT_SCISSOR_1_BR                               0x02825C
++#define R_028260_PA_SC_VPORT_SCISSOR_2_TL                               0x028260
++#define R_028264_PA_SC_VPORT_SCISSOR_2_BR                               0x028264
++#define R_028268_PA_SC_VPORT_SCISSOR_3_TL                               0x028268
++#define R_02826C_PA_SC_VPORT_SCISSOR_3_BR                               0x02826C
++#define R_028270_PA_SC_VPORT_SCISSOR_4_TL                               0x028270
++#define R_028274_PA_SC_VPORT_SCISSOR_4_BR                               0x028274
++#define R_028278_PA_SC_VPORT_SCISSOR_5_TL                               0x028278
++#define R_02827C_PA_SC_VPORT_SCISSOR_5_BR                               0x02827C
++#define R_028280_PA_SC_VPORT_SCISSOR_6_TL                               0x028280
++#define R_028284_PA_SC_VPORT_SCISSOR_6_BR                               0x028284
++#define R_028288_PA_SC_VPORT_SCISSOR_7_TL                               0x028288
++#define R_02828C_PA_SC_VPORT_SCISSOR_7_BR                               0x02828C
++#define R_028290_PA_SC_VPORT_SCISSOR_8_TL                               0x028290
++#define R_028294_PA_SC_VPORT_SCISSOR_8_BR                               0x028294
++#define R_028298_PA_SC_VPORT_SCISSOR_9_TL                               0x028298
++#define R_02829C_PA_SC_VPORT_SCISSOR_9_BR                               0x02829C
++#define R_0282A0_PA_SC_VPORT_SCISSOR_10_TL                              0x0282A0
++#define R_0282A4_PA_SC_VPORT_SCISSOR_10_BR                              0x0282A4
++#define R_0282A8_PA_SC_VPORT_SCISSOR_11_TL                              0x0282A8
++#define R_0282AC_PA_SC_VPORT_SCISSOR_11_BR                              0x0282AC
++#define R_0282B0_PA_SC_VPORT_SCISSOR_12_TL                              0x0282B0
++#define R_0282B4_PA_SC_VPORT_SCISSOR_12_BR                              0x0282B4
++#define R_0282B8_PA_SC_VPORT_SCISSOR_13_TL                              0x0282B8
++#define R_0282BC_PA_SC_VPORT_SCISSOR_13_BR                              0x0282BC
++#define R_0282C0_PA_SC_VPORT_SCISSOR_14_TL                              0x0282C0
++#define R_0282C4_PA_SC_VPORT_SCISSOR_14_BR                              0x0282C4
++#define R_0282C8_PA_SC_VPORT_SCISSOR_15_TL                              0x0282C8
++#define R_0282CC_PA_SC_VPORT_SCISSOR_15_BR                              0x0282CC
+ #define R_0282D0_PA_SC_VPORT_ZMIN_0                                     0x0282D0
+ #define R_0282D4_PA_SC_VPORT_ZMAX_0                                     0x0282D4
++#define R_0282D8_PA_SC_VPORT_ZMIN_1                                     0x0282D8
++#define R_0282DC_PA_SC_VPORT_ZMAX_1                                     0x0282DC
++#define R_0282E0_PA_SC_VPORT_ZMIN_2                                     0x0282E0
++#define R_0282E4_PA_SC_VPORT_ZMAX_2                                     0x0282E4
++#define R_0282E8_PA_SC_VPORT_ZMIN_3                                     0x0282E8
++#define R_0282EC_PA_SC_VPORT_ZMAX_3                                     0x0282EC
++#define R_0282F0_PA_SC_VPORT_ZMIN_4                                     0x0282F0
++#define R_0282F4_PA_SC_VPORT_ZMAX_4                                     0x0282F4
++#define R_0282F8_PA_SC_VPORT_ZMIN_5                                     0x0282F8
++#define R_0282FC_PA_SC_VPORT_ZMAX_5                                     0x0282FC
++#define R_028300_PA_SC_VPORT_ZMIN_6                                     0x028300
++#define R_028304_PA_SC_VPORT_ZMAX_6                                     0x028304
++#define R_028308_PA_SC_VPORT_ZMIN_7                                     0x028308
++#define R_02830C_PA_SC_VPORT_ZMAX_7                                     0x02830C
++#define R_028310_PA_SC_VPORT_ZMIN_8                                     0x028310
++#define R_028314_PA_SC_VPORT_ZMAX_8                                     0x028314
++#define R_028318_PA_SC_VPORT_ZMIN_9                                     0x028318
++#define R_02831C_PA_SC_VPORT_ZMAX_9                                     0x02831C
++#define R_028320_PA_SC_VPORT_ZMIN_10                                    0x028320
++#define R_028324_PA_SC_VPORT_ZMAX_10                                    0x028324
++#define R_028328_PA_SC_VPORT_ZMIN_11                                    0x028328
++#define R_02832C_PA_SC_VPORT_ZMAX_11                                    0x02832C
++#define R_028330_PA_SC_VPORT_ZMIN_12                                    0x028330
++#define R_028334_PA_SC_VPORT_ZMAX_12                                    0x028334
++#define R_028338_PA_SC_VPORT_ZMIN_13                                    0x028338
++#define R_02833C_PA_SC_VPORT_ZMAX_13                                    0x02833C
++#define R_028340_PA_SC_VPORT_ZMIN_14                                    0x028340
++#define R_028344_PA_SC_VPORT_ZMAX_14                                    0x028344
++#define R_028348_PA_SC_VPORT_ZMIN_15                                    0x028348
++#define R_02834C_PA_SC_VPORT_ZMAX_15                                    0x02834C
+ #define R_028350_PA_SC_RASTER_CONFIG                                    0x028350
+ #define   S_028350_RB_MAP_PKR0(x)                                     (((x) & 0x03) << 0)
+ #define   G_028350_RB_MAP_PKR0(x)                                     (((x) >> 0) & 0x03)
+@@ -5827,6 +6520,13 @@
+ #define     V_028354_RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE        0x01
+ #define     V_028354_RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE        0x02
+ #define     V_028354_RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE        0x03
++#define R_028358_PA_SC_SCREEN_EXTENT_CONTROL                            0x028358
++#define   S_028358_SLICE_EVEN_ENABLE(x)                               (((x) & 0x03) << 0)
++#define   G_028358_SLICE_EVEN_ENABLE(x)                               (((x) >> 0) & 0x03)
++#define   C_028358_SLICE_EVEN_ENABLE                                  0xFFFFFFFC
++#define   S_028358_SLICE_ODD_ENABLE(x)                                (((x) & 0x03) << 2)
++#define   G_028358_SLICE_ODD_ENABLE(x)                                (((x) >> 2) & 0x03)
++#define   C_028358_SLICE_ODD_ENABLE                                   0xFFFFFFF3
+ /*     */
+ #define R_028400_VGT_MAX_VTX_INDX                                       0x028400
+ #define R_028404_VGT_MIN_VTX_INDX                                       0x028404
+@@ -5836,6 +6536,18 @@
+ #define R_028418_CB_BLEND_GREEN                                         0x028418
+ #define R_02841C_CB_BLEND_BLUE                                          0x02841C
+ #define R_028420_CB_BLEND_ALPHA                                         0x028420
++/* VI */
++#define R_028424_CB_DCC_CONTROL                                         0x028424
++#define   S_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((x) & 0x1) << 0)
++#define   G_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((x) >> 0) & 0x1)
++#define   C_028424_OVERWRITE_COMBINER_DISABLE                         0xFFFFFFFE
++#define   S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((x) & 0x1) << 1)
++#define   G_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((x) >> 1) & 0x1)
++#define   C_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE             0xFFFFFFFD
++#define   S_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((x) & 0x1F) << 2)
++#define   G_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((x) >> 2) & 0x1F)
++#define   C_028424_OVERWRITE_COMBINER_WATERMARK                       0xFFFFFF83
++/*    */
+ #define R_02842C_DB_STENCIL_CONTROL                                     0x02842C
+ #define   S_02842C_STENCILFAIL(x)                                     (((x) & 0x0F) << 0)
+ #define   G_02842C_STENCILFAIL(x)                                     (((x) >> 0) & 0x0F)
+@@ -5977,12 +6689,102 @@
+ #define   S_028434_STENCILOPVAL_BF(x)                                 (((x) & 0xFF) << 24)
+ #define   G_028434_STENCILOPVAL_BF(x)                                 (((x) >> 24) & 0xFF)
+ #define   C_028434_STENCILOPVAL_BF                                    0x00FFFFFF
+-#define R_02843C_PA_CL_VPORT_XSCALE_0                                   0x02843C
+-#define R_028440_PA_CL_VPORT_XOFFSET_0                                  0x028440
+-#define R_028444_PA_CL_VPORT_YSCALE_0                                   0x028444
+-#define R_028448_PA_CL_VPORT_YOFFSET_0                                  0x028448
+-#define R_02844C_PA_CL_VPORT_ZSCALE_0                                   0x02844C
+-#define R_028450_PA_CL_VPORT_ZOFFSET_0                                  0x028450
++#define R_02843C_PA_CL_VPORT_XSCALE                                     0x02843C
++#define R_028440_PA_CL_VPORT_XOFFSET                                    0x028440
++#define R_028444_PA_CL_VPORT_YSCALE                                     0x028444
++#define R_028448_PA_CL_VPORT_YOFFSET                                    0x028448
++#define R_02844C_PA_CL_VPORT_ZSCALE                                     0x02844C
++#define R_028450_PA_CL_VPORT_ZOFFSET                                    0x028450
++#define R_028454_PA_CL_VPORT_XSCALE_1                                   0x028454
++#define R_028458_PA_CL_VPORT_XOFFSET_1                                  0x028458
++#define R_02845C_PA_CL_VPORT_YSCALE_1                                   0x02845C
++#define R_028460_PA_CL_VPORT_YOFFSET_1                                  0x028460
++#define R_028464_PA_CL_VPORT_ZSCALE_1                                   0x028464
++#define R_028468_PA_CL_VPORT_ZOFFSET_1                                  0x028468
++#define R_02846C_PA_CL_VPORT_XSCALE_2                                   0x02846C
++#define R_028470_PA_CL_VPORT_XOFFSET_2                                  0x028470
++#define R_028474_PA_CL_VPORT_YSCALE_2                                   0x028474
++#define R_028478_PA_CL_VPORT_YOFFSET_2                                  0x028478
++#define R_02847C_PA_CL_VPORT_ZSCALE_2                                   0x02847C
++#define R_028480_PA_CL_VPORT_ZOFFSET_2                                  0x028480
++#define R_028484_PA_CL_VPORT_XSCALE_3                                   0x028484
++#define R_028488_PA_CL_VPORT_XOFFSET_3                                  0x028488
++#define R_02848C_PA_CL_VPORT_YSCALE_3                                   0x02848C
++#define R_028490_PA_CL_VPORT_YOFFSET_3                                  0x028490
++#define R_028494_PA_CL_VPORT_ZSCALE_3                                   0x028494
++#define R_028498_PA_CL_VPORT_ZOFFSET_3                                  0x028498
++#define R_02849C_PA_CL_VPORT_XSCALE_4                                   0x02849C
++#define R_0284A0_PA_CL_VPORT_XOFFSET_4                                  0x0284A0
++#define R_0284A4_PA_CL_VPORT_YSCALE_4                                   0x0284A4
++#define R_0284A8_PA_CL_VPORT_YOFFSET_4                                  0x0284A8
++#define R_0284AC_PA_CL_VPORT_ZSCALE_4                                   0x0284AC
++#define R_0284B0_PA_CL_VPORT_ZOFFSET_4                                  0x0284B0
++#define R_0284B4_PA_CL_VPORT_XSCALE_5                                   0x0284B4
++#define R_0284B8_PA_CL_VPORT_XOFFSET_5                                  0x0284B8
++#define R_0284BC_PA_CL_VPORT_YSCALE_5                                   0x0284BC
++#define R_0284C0_PA_CL_VPORT_YOFFSET_5                                  0x0284C0
++#define R_0284C4_PA_CL_VPORT_ZSCALE_5                                   0x0284C4
++#define R_0284C8_PA_CL_VPORT_ZOFFSET_5                                  0x0284C8
++#define R_0284CC_PA_CL_VPORT_XSCALE_6                                   0x0284CC
++#define R_0284D0_PA_CL_VPORT_XOFFSET_6                                  0x0284D0
++#define R_0284D4_PA_CL_VPORT_YSCALE_6                                   0x0284D4
++#define R_0284D8_PA_CL_VPORT_YOFFSET_6                                  0x0284D8
++#define R_0284DC_PA_CL_VPORT_ZSCALE_6                                   0x0284DC
++#define R_0284E0_PA_CL_VPORT_ZOFFSET_6                                  0x0284E0
++#define R_0284E4_PA_CL_VPORT_XSCALE_7                                   0x0284E4
++#define R_0284E8_PA_CL_VPORT_XOFFSET_7                                  0x0284E8
++#define R_0284EC_PA_CL_VPORT_YSCALE_7                                   0x0284EC
++#define R_0284F0_PA_CL_VPORT_YOFFSET_7                                  0x0284F0
++#define R_0284F4_PA_CL_VPORT_ZSCALE_7                                   0x0284F4
++#define R_0284F8_PA_CL_VPORT_ZOFFSET_7                                  0x0284F8
++#define R_0284FC_PA_CL_VPORT_XSCALE_8                                   0x0284FC
++#define R_028500_PA_CL_VPORT_XOFFSET_8                                  0x028500
++#define R_028504_PA_CL_VPORT_YSCALE_8                                   0x028504
++#define R_028508_PA_CL_VPORT_YOFFSET_8                                  0x028508
++#define R_02850C_PA_CL_VPORT_ZSCALE_8                                   0x02850C
++#define R_028510_PA_CL_VPORT_ZOFFSET_8                                  0x028510
++#define R_028514_PA_CL_VPORT_XSCALE_9                                   0x028514
++#define R_028518_PA_CL_VPORT_XOFFSET_9                                  0x028518
++#define R_02851C_PA_CL_VPORT_YSCALE_9                                   0x02851C
++#define R_028520_PA_CL_VPORT_YOFFSET_9                                  0x028520
++#define R_028524_PA_CL_VPORT_ZSCALE_9                                   0x028524
++#define R_028528_PA_CL_VPORT_ZOFFSET_9                                  0x028528
++#define R_02852C_PA_CL_VPORT_XSCALE_10                                  0x02852C
++#define R_028530_PA_CL_VPORT_XOFFSET_10                                 0x028530
++#define R_028534_PA_CL_VPORT_YSCALE_10                                  0x028534
++#define R_028538_PA_CL_VPORT_YOFFSET_10                                 0x028538
++#define R_02853C_PA_CL_VPORT_ZSCALE_10                                  0x02853C
++#define R_028540_PA_CL_VPORT_ZOFFSET_10                                 0x028540
++#define R_028544_PA_CL_VPORT_XSCALE_11                                  0x028544
++#define R_028548_PA_CL_VPORT_XOFFSET_11                                 0x028548
++#define R_02854C_PA_CL_VPORT_YSCALE_11                                  0x02854C
++#define R_028550_PA_CL_VPORT_YOFFSET_11                                 0x028550
++#define R_028554_PA_CL_VPORT_ZSCALE_11                                  0x028554
++#define R_028558_PA_CL_VPORT_ZOFFSET_11                                 0x028558
++#define R_02855C_PA_CL_VPORT_XSCALE_12                                  0x02855C
++#define R_028560_PA_CL_VPORT_XOFFSET_12                                 0x028560
++#define R_028564_PA_CL_VPORT_YSCALE_12                                  0x028564
++#define R_028568_PA_CL_VPORT_YOFFSET_12                                 0x028568
++#define R_02856C_PA_CL_VPORT_ZSCALE_12                                  0x02856C
++#define R_028570_PA_CL_VPORT_ZOFFSET_12                                 0x028570
++#define R_028574_PA_CL_VPORT_XSCALE_13                                  0x028574
++#define R_028578_PA_CL_VPORT_XOFFSET_13                                 0x028578
++#define R_02857C_PA_CL_VPORT_YSCALE_13                                  0x02857C
++#define R_028580_PA_CL_VPORT_YOFFSET_13                                 0x028580
++#define R_028584_PA_CL_VPORT_ZSCALE_13                                  0x028584
++#define R_028588_PA_CL_VPORT_ZOFFSET_13                                 0x028588
++#define R_02858C_PA_CL_VPORT_XSCALE_14                                  0x02858C
++#define R_028590_PA_CL_VPORT_XOFFSET_14                                 0x028590
++#define R_028594_PA_CL_VPORT_YSCALE_14                                  0x028594
++#define R_028598_PA_CL_VPORT_YOFFSET_14                                 0x028598
++#define R_02859C_PA_CL_VPORT_ZSCALE_14                                  0x02859C
++#define R_0285A0_PA_CL_VPORT_ZOFFSET_14                                 0x0285A0
++#define R_0285A4_PA_CL_VPORT_XSCALE_15                                  0x0285A4
++#define R_0285A8_PA_CL_VPORT_XOFFSET_15                                 0x0285A8
++#define R_0285AC_PA_CL_VPORT_YSCALE_15                                  0x0285AC
++#define R_0285B0_PA_CL_VPORT_YOFFSET_15                                 0x0285B0
++#define R_0285B4_PA_CL_VPORT_ZSCALE_15                                  0x0285B4
++#define R_0285B8_PA_CL_VPORT_ZOFFSET_15                                 0x0285B8
+ #define R_0285BC_PA_CL_UCP_0_X                                          0x0285BC
+ #define R_0285C0_PA_CL_UCP_0_Y                                          0x0285C0
+ #define R_0285C4_PA_CL_UCP_0_Z                                          0x0285C4
+@@ -6029,6 +6831,26 @@
+ #define   G_028644_DUP(x)                                             (((x) >> 18) & 0x1)
+ #define   C_028644_DUP                                                0xFFFBFFFF
+ /*     */
++/* VI */
++#define   S_028644_FP16_INTERP_MODE(x)                                (((x) & 0x1) << 19)
++#define   G_028644_FP16_INTERP_MODE(x)                                (((x) >> 19) & 0x1)
++#define   C_028644_FP16_INTERP_MODE                                   0xFFF7FFFF
++#define   S_028644_USE_DEFAULT_ATTR1(x)                               (((x) & 0x1) << 20)
++#define   G_028644_USE_DEFAULT_ATTR1(x)                               (((x) >> 20) & 0x1)
++#define   C_028644_USE_DEFAULT_ATTR1                                  0xFFEFFFFF
++#define   S_028644_DEFAULT_VAL_ATTR1(x)                               (((x) & 0x03) << 21)
++#define   G_028644_DEFAULT_VAL_ATTR1(x)                               (((x) >> 21) & 0x03)
++#define   C_028644_DEFAULT_VAL_ATTR1                                  0xFF9FFFFF
++#define   S_028644_PT_SPRITE_TEX_ATTR1(x)                             (((x) & 0x1) << 23)
++#define   G_028644_PT_SPRITE_TEX_ATTR1(x)                             (((x) >> 23) & 0x1)
++#define   C_028644_PT_SPRITE_TEX_ATTR1                                0xFF7FFFFF
++#define   S_028644_ATTR0_VALID(x)                                     (((x) & 0x1) << 24)
++#define   G_028644_ATTR0_VALID(x)                                     (((x) >> 24) & 0x1)
++#define   C_028644_ATTR0_VALID                                        0xFEFFFFFF
++#define   S_028644_ATTR1_VALID(x)                                     (((x) & 0x1) << 25)
++#define   G_028644_ATTR1_VALID(x)                                     (((x) >> 25) & 0x1)
++#define   C_028644_ATTR1_VALID                                        0xFDFFFFFF
++/*    */
+ #define R_028648_SPI_PS_INPUT_CNTL_1                                    0x028648
+ #define R_02864C_SPI_PS_INPUT_CNTL_2                                    0x02864C
+ #define R_028650_SPI_PS_INPUT_CNTL_3                                    0x028650
+@@ -6552,6 +7374,10 @@
+ #define R_028794_CB_BLEND5_CONTROL                                      0x028794
+ #define R_028798_CB_BLEND6_CONTROL                                      0x028798
+ #define R_02879C_CB_BLEND7_CONTROL                                      0x02879C
++#define R_0287CC_CS_COPY_STATE                                          0x0287CC
++#define   S_0287CC_SRC_STATE_ID(x)                                    (((x) & 0x07) << 0)
++#define   G_0287CC_SRC_STATE_ID(x)                                    (((x) >> 0) & 0x07)
++#define   C_0287CC_SRC_STATE_ID                                       0xFFFFFFF8
+ #define R_0287D4_PA_CL_POINT_X_RAD                                      0x0287D4
+ #define R_0287D8_PA_CL_POINT_Y_RAD                                      0x0287D8
+ #define R_0287DC_PA_CL_POINT_SIZE                                       0x0287DC
+@@ -6581,6 +7407,10 @@
+ #define   G_0287F0_USE_OPAQUE(x)                                      (((x) >> 6) & 0x1)
+ #define   C_0287F0_USE_OPAQUE                                         0xFFFFFFBF
+ #define R_0287F4_VGT_IMMED_DATA                                         0x0287F4 /* not on CIK */
++#define R_0287F8_VGT_EVENT_ADDRESS_REG                                  0x0287F8
++#define   S_0287F8_ADDRESS_LOW(x)                                     (((x) & 0xFFFFFFF) << 0)
++#define   G_0287F8_ADDRESS_LOW(x)                                     (((x) >> 0) & 0xFFFFFFF)
++#define   C_0287F8_ADDRESS_LOW                                        0xF0000000
+ #define R_028800_DB_DEPTH_CONTROL                                       0x028800
+ #define   S_028800_STENCIL_ENABLE(x)                                  (((x) & 0x1) << 0)
+ #define   G_028800_STENCIL_ENABLE(x)                                  (((x) >> 0) & 0x1)
+@@ -6637,36 +7467,42 @@
+ #define   G_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS(x)              (((x) >> 31) & 0x1)
+ #define   C_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS                 0x7FFFFFFF
+ #define R_028804_DB_EQAA                                                0x028804
+-#define   S_028804_MAX_ANCHOR_SAMPLES(x)		(((x) & 0x7) << 0)
+-#define   G_028804_MAX_ANCHOR_SAMPLES(x)		(((x) >> 0) & 0x7)
+-#define   C_028804_MAX_ANCHOR_SAMPLES			(~(((~0) & 0x7) << 0))
+-#define   S_028804_PS_ITER_SAMPLES(x)			(((x) & 0x7) << 4)
+-#define   G_028804_PS_ITER_SAMPLES(x)			(((x) >> 4) & 0x7)
+-#define   C_028804_PS_ITER_SAMPLES			(~(((~0) & 0x7) << 4))
+-#define   S_028804_MASK_EXPORT_NUM_SAMPLES(x)		(((x) & 0x7) << 8)
+-#define   G_028804_MASK_EXPORT_NUM_SAMPLES(x)		(((x) >> 8) & 0x7)
+-#define   C_028804_MASK_EXPORT_NUM_SAMPLES		(~(((~0) & 0x7) << 8))
+-#define   S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)		(((x) & 0x7) << 12)
+-#define   G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)		(((x) >> 12) & 0x7)
+-#define   C_028804_ALPHA_TO_MASK_NUM_SAMPLES		(~(((~0) & 0x7) << 12))
+-#define   S_028804_HIGH_QUALITY_INTERSECTIONS(x)	(((x) & 0x1) << 16)
+-#define   G_028804_HIGH_QUALITY_INTERSECTIONS(x)	(((x) >> 16) & 0x1)
+-#define   C_028804_HIGH_QUALITY_INTERSECTIONS		(~(((~0) & 0x1) << 16))
+-#define   S_028804_INCOHERENT_EQAA_READS(x)		(((x) & 0x1) << 17)
+-#define   G_028804_INCOHERENT_EQAA_READS(x)		(((x) >> 17) & 0x1)
+-#define   C_028804_INCOHERENT_EQAA_READS		(~(((~0) & 0x1) << 17))
+-#define   S_028804_INTERPOLATE_COMP_Z(x)		(((x) & 0x1) << 18)
+-#define   G_028804_INTERPOLATE_COMP_Z(x)		(((x) >> 18) & 0x1)
+-#define   C_028804_INTERPOLATE_COMP_Z			(~(((~0) >> 18) & 0x1))
+-#define   S_028804_INTERPOLATE_SRC_Z(x)			(((x) & 0x1) << 19)
+-#define   G_028804_INTERPOLATE_SRC_Z(x)			(((x) >> 19) & 0x1)
+-#define   C_028804_INTERPOLATE_SRC_Z			(~(((~0) & 0x1) << 19))
+-#define   S_028804_STATIC_ANCHOR_ASSOCIATIONS(x)	(((x) & 0x1) << 20)
+-#define   G_028804_STATIC_ANCHOR_ASSOCIATIONS(x)	(((x) >> 20) & 0x1)
+-#define   C_028804_STATIC_ANCHOR_ASSOCIATIONS		(~(((~0) & 0x1) << 20))
+-#define   S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)	(((x) & 0x1) << 21)
+-#define   G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)	(((x) >> 21) & 0x1)
+-#define   C_028804_ALPHA_TO_MASK_EQAA_DISABLE		(~(((~0) & 0x1) << 21))
++#define   S_028804_MAX_ANCHOR_SAMPLES(x)                              (((x) & 0x7) << 0)
++#define   G_028804_MAX_ANCHOR_SAMPLES(x)                              (((x) >> 0) & 0x07)
++#define   C_028804_MAX_ANCHOR_SAMPLES                                 0xFFFFFFF8
++#define   S_028804_PS_ITER_SAMPLES(x)                                 (((x) & 0x7) << 4)
++#define   G_028804_PS_ITER_SAMPLES(x)                                 (((x) >> 4) & 0x07)
++#define   C_028804_PS_ITER_SAMPLES                                    0xFFFFFF8F
++#define   S_028804_MASK_EXPORT_NUM_SAMPLES(x)                         (((x) & 0x7) << 8)
++#define   G_028804_MASK_EXPORT_NUM_SAMPLES(x)                         (((x) >> 8) & 0x07)
++#define   C_028804_MASK_EXPORT_NUM_SAMPLES                            0xFFFFF8FF
++#define   S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)                       (((x) & 0x7) << 12)
++#define   G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)                       (((x) >> 12) & 0x07)
++#define   C_028804_ALPHA_TO_MASK_NUM_SAMPLES                          0xFFFF8FFF
++#define   S_028804_HIGH_QUALITY_INTERSECTIONS(x)                      (((x) & 0x1) << 16)
++#define   G_028804_HIGH_QUALITY_INTERSECTIONS(x)                      (((x) >> 16) & 0x1)
++#define   C_028804_HIGH_QUALITY_INTERSECTIONS                         0xFFFEFFFF
++#define   S_028804_INCOHERENT_EQAA_READS(x)                           (((x) & 0x1) << 17)
++#define   G_028804_INCOHERENT_EQAA_READS(x)                           (((x) >> 17) & 0x1)
++#define   C_028804_INCOHERENT_EQAA_READS                              0xFFFDFFFF
++#define   S_028804_INTERPOLATE_COMP_Z(x)                              (((x) & 0x1) << 18)
++#define   G_028804_INTERPOLATE_COMP_Z(x)                              (((x) >> 18) & 0x1)
++#define   C_028804_INTERPOLATE_COMP_Z                                 0xFFFBFFFF
++#define   S_028804_INTERPOLATE_SRC_Z(x)                               (((x) & 0x1) << 19)
++#define   G_028804_INTERPOLATE_SRC_Z(x)                               (((x) >> 19) & 0x1)
++#define   C_028804_INTERPOLATE_SRC_Z                                  0xFFF7FFFF
++#define   S_028804_STATIC_ANCHOR_ASSOCIATIONS(x)                      (((x) & 0x1) << 20)
++#define   G_028804_STATIC_ANCHOR_ASSOCIATIONS(x)                      (((x) >> 20) & 0x1)
++#define   C_028804_STATIC_ANCHOR_ASSOCIATIONS                         0xFFEFFFFF
++#define   S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)                      (((x) & 0x1) << 21)
++#define   G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)                      (((x) >> 21) & 0x1)
++#define   C_028804_ALPHA_TO_MASK_EQAA_DISABLE                         0xFFDFFFFF
++#define   S_028804_OVERRASTERIZATION_AMOUNT(x)                        (((x) & 0x07) << 24)
++#define   G_028804_OVERRASTERIZATION_AMOUNT(x)                        (((x) >> 24) & 0x07)
++#define   C_028804_OVERRASTERIZATION_AMOUNT                           0xF8FFFFFF
++#define   S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)                  (((x) & 0x1) << 27)
++#define   G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)                  (((x) >> 27) & 0x1)
++#define   C_028804_ENABLE_POSTZ_OVERRASTERIZATION                     0xF7FFFFFF
+ #define R_028808_CB_COLOR_CONTROL                                       0x028808
+ #define   S_028808_DEGAMMA_ENABLE(x)                                  (((x) & 0x1) << 3)
+ #define   G_028808_DEGAMMA_ENABLE(x)                                  (((x) >> 3) & 0x1)
+@@ -6970,6 +7806,11 @@
+ #define   S_02881C_USE_VTX_GS_CUT_FLAG(x)                             (((x) & 0x1) << 25)
+ #define   G_02881C_USE_VTX_GS_CUT_FLAG(x)                             (((x) >> 25) & 0x1)
+ #define   C_02881C_USE_VTX_GS_CUT_FLAG                                0xFDFFFFFF
++/* VI */
++#define   S_02881C_USE_VTX_LINE_WIDTH(x)                              (((x) & 0x1) << 26)
++#define   G_02881C_USE_VTX_LINE_WIDTH(x)                              (((x) >> 26) & 0x1)
++#define   C_02881C_USE_VTX_LINE_WIDTH                                 0xFBFFFFFF
++/*    */
+ #define R_028820_PA_CL_NANINF_CNTL                                      0x028820
+ #define   S_028820_VTE_XY_INF_DISCARD(x)                              (((x) & 0x1) << 0)
+ #define   G_028820_VTE_XY_INF_DISCARD(x)                              (((x) >> 0) & 0x1)
+@@ -7440,9 +8281,21 @@
+ #define   S_028A4C_PS_ITER_SAMPLE(x)                                  (((x) & 0x1) << 16)
+ #define   G_028A4C_PS_ITER_SAMPLE(x)                                  (((x) >> 16) & 0x1)
+ #define   C_028A4C_PS_ITER_SAMPLE                                     0xFFFEFFFF
+-#define   S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC(x)                   (((x) & 0x1) << 17)
+-#define   G_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC(x)                   (((x) >> 17) & 0x1)
+-#define   C_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC                      0xFFFDFFFF
++#define   S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(x)         (((x) & 0x1) << 17)
++#define   G_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(x)         (((x) >> 17) & 0x1)
++#define   C_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE            0xFFFDFFFF
++#define   S_028A4C_MULTI_GPU_SUPERTILE_ENABLE(x)                      (((x) & 0x1) << 18)
++#define   G_028A4C_MULTI_GPU_SUPERTILE_ENABLE(x)                      (((x) >> 18) & 0x1)
++#define   C_028A4C_MULTI_GPU_SUPERTILE_ENABLE                         0xFFFBFFFF
++#define   S_028A4C_GPU_ID_OVERRIDE_ENABLE(x)                          (((x) & 0x1) << 19)
++#define   G_028A4C_GPU_ID_OVERRIDE_ENABLE(x)                          (((x) >> 19) & 0x1)
++#define   C_028A4C_GPU_ID_OVERRIDE_ENABLE                             0xFFF7FFFF
++#define   S_028A4C_GPU_ID_OVERRIDE(x)                                 (((x) & 0x0F) << 20)
++#define   G_028A4C_GPU_ID_OVERRIDE(x)                                 (((x) >> 20) & 0x0F)
++#define   C_028A4C_GPU_ID_OVERRIDE                                    0xFF0FFFFF
++#define   S_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(x)                   (((x) & 0x1) << 24)
++#define   G_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(x)                   (((x) >> 24) & 0x1)
++#define   C_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE                      0xFEFFFFFF
+ #define   S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x)                         (((x) & 0x1) << 25)
+ #define   G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x)                         (((x) >> 25) & 0x1)
+ #define   C_028A4C_FORCE_EOV_CNTDWN_ENABLE                            0xFDFFFFFF
+@@ -7508,6 +8361,7 @@
+ #define   C_028A7C_INDEX_TYPE                                         0xFFFFFFFC
+ #define     V_028A7C_VGT_INDEX_16                                   0x00
+ #define     V_028A7C_VGT_INDEX_32                                   0x01
++#define     V_028A7C_VGT_INDEX_8                                    0x02 /* VI */
+ #define   S_028A7C_SWAP_MODE(x)                                       (((x) & 0x03) << 2)
+ #define   G_028A7C_SWAP_MODE(x)                                       (((x) >> 2) & 0x03)
+ #define   C_028A7C_SWAP_MODE                                          0xFFFFFFF3
+@@ -7537,6 +8391,12 @@
+ #define   G_028A7C_REQ_PATH(x)                                        (((x) >> 10) & 0x1)
+ #define   C_028A7C_REQ_PATH                                           0xFFFFFBFF
+ /*     */
++/* VI */
++#define   S_028A7C_MTYPE(x)                                           (((x) & 0x03) << 11)
++#define   G_028A7C_MTYPE(x)                                           (((x) >> 11) & 0x03)
++#define   C_028A7C_MTYPE                                              0xFFFFE7FF
++/*    */
++#define R_028A80_WD_ENHANCE                                             0x028A80
+ #define R_028A84_VGT_PRIMITIVEID_EN                                     0x028A84
+ #define   S_028A84_PRIMITIVEID_EN(x)                                  (((x) & 0x1) << 0)
+ #define   G_028A84_PRIMITIVEID_EN(x)                                  (((x) >> 0) & 0x1)
+@@ -7635,6 +8495,10 @@
+ #define   S_028AA8_WD_SWITCH_ON_EOP(x)                                (((x) & 0x1) << 20)
+ #define   G_028AA8_WD_SWITCH_ON_EOP(x)                                (((x) >> 20) & 0x1)
+ #define   C_028AA8_WD_SWITCH_ON_EOP                                   0xFFEFFFFF
++/* VI */
++#define   S_028AA8_MAX_PRIMGRP_IN_WAVE(x)                             (((x) & 0x0F) << 28)
++#define   G_028AA8_MAX_PRIMGRP_IN_WAVE(x)                             (((x) >> 28) & 0x0F)
++#define   C_028AA8_MAX_PRIMGRP_IN_WAVE                                0x0FFFFFFF
+ /*     */
+ #define R_028AAC_VGT_ESGS_RING_ITEMSIZE                                 0x028AAC
+ #define   S_028AAC_ITEMSIZE(x)                                        (((x) & 0x7FFF) << 0)
+@@ -7674,6 +8538,11 @@
+ #define   S_028ABC_DST_OUTSIDE_ZERO_TO_ONE(x)                         (((x) & 0x1) << 16)
+ #define   G_028ABC_DST_OUTSIDE_ZERO_TO_ONE(x)                         (((x) >> 16) & 0x1)
+ #define   C_028ABC_DST_OUTSIDE_ZERO_TO_ONE                            0xFFFEFFFF
++/* VI */
++#define   S_028ABC_TC_COMPATIBLE(x)                                   (((x) & 0x1) << 17)
++#define   G_028ABC_TC_COMPATIBLE(x)                                   (((x) >> 17) & 0x1)
++#define   C_028ABC_TC_COMPATIBLE                                      0xFFFDFFFF
++/*    */
+ #define R_028AC0_DB_SRESULTS_COMPARE_STATE0                             0x028AC0
+ #define   S_028AC0_COMPAREFUNC0(x)                                    (((x) & 0x07) << 0)
+ #define   G_028AC0_COMPAREFUNC0(x)                                    (((x) >> 0) & 0x07)
+@@ -7763,6 +8632,21 @@
+ #define   S_028B38_MAX_VERT_OUT(x)                                    (((x) & 0x7FF) << 0)
+ #define   G_028B38_MAX_VERT_OUT(x)                                    (((x) >> 0) & 0x7FF)
+ #define   C_028B38_MAX_VERT_OUT                                       0xFFFFF800
++/* VI */
++#define R_028B50_VGT_TESS_DISTRIBUTION                                  0x028B50
++#define   S_028B50_ACCUM_ISOLINE(x)                                   (((x) & 0xFF) << 0)
++#define   G_028B50_ACCUM_ISOLINE(x)                                   (((x) >> 0) & 0xFF)
++#define   C_028B50_ACCUM_ISOLINE                                      0xFFFFFF00
++#define   S_028B50_ACCUM_TRI(x)                                       (((x) & 0xFF) << 8)
++#define   G_028B50_ACCUM_TRI(x)                                       (((x) >> 8) & 0xFF)
++#define   C_028B50_ACCUM_TRI                                          0xFFFF00FF
++#define   S_028B50_ACCUM_QUAD(x)                                      (((x) & 0xFF) << 16)
++#define   G_028B50_ACCUM_QUAD(x)                                      (((x) >> 16) & 0xFF)
++#define   C_028B50_ACCUM_QUAD                                         0xFF00FFFF
++#define   S_028B50_DONUT_SPLIT(x)                                     (((x) & 0xFF) << 24)
++#define   G_028B50_DONUT_SPLIT(x)                                     (((x) >> 24) & 0xFF)
++#define   C_028B50_DONUT_SPLIT                                        0x00FFFFFF
++/*    */
+ #define R_028B54_VGT_SHADER_STAGES_EN                                   0x028B54
+ #define   S_028B54_LS_EN(x)                                           (((x) & 0x03) << 0)
+ #define   G_028B54_LS_EN(x)                                           (((x) >> 0) & 0x03)
+@@ -7791,6 +8675,20 @@
+ #define   S_028B54_DYNAMIC_HS(x)                                      (((x) & 0x1) << 8)
+ #define   G_028B54_DYNAMIC_HS(x)                                      (((x) >> 8) & 0x1)
+ #define   C_028B54_DYNAMIC_HS                                         0xFFFFFEFF
++/* VI */
++#define   S_028B54_DISPATCH_DRAW_EN(x)                                (((x) & 0x1) << 9)
++#define   G_028B54_DISPATCH_DRAW_EN(x)                                (((x) >> 9) & 0x1)
++#define   C_028B54_DISPATCH_DRAW_EN                                   0xFFFFFDFF
++#define   S_028B54_DIS_DEALLOC_ACCUM_0(x)                             (((x) & 0x1) << 10)
++#define   G_028B54_DIS_DEALLOC_ACCUM_0(x)                             (((x) >> 10) & 0x1)
++#define   C_028B54_DIS_DEALLOC_ACCUM_0                                0xFFFFFBFF
++#define   S_028B54_DIS_DEALLOC_ACCUM_1(x)                             (((x) & 0x1) << 11)
++#define   G_028B54_DIS_DEALLOC_ACCUM_1(x)                             (((x) >> 11) & 0x1)
++#define   C_028B54_DIS_DEALLOC_ACCUM_1                                0xFFFFF7FF
++#define   S_028B54_VS_WAVE_ID_EN(x)                                   (((x) & 0x1) << 12)
++#define   G_028B54_VS_WAVE_ID_EN(x)                                   (((x) >> 12) & 0x1)
++#define   C_028B54_VS_WAVE_ID_EN                                      0xFFFFEFFF
++/*    */
+ #define R_028B58_VGT_LS_HS_CONFIG                                       0x028B58
+ #define   S_028B58_NUM_PATCHES(x)                                     (((x) & 0xFF) << 0)
+ #define   G_028B58_NUM_PATCHES(x)                                     (((x) >> 0) & 0xFF)
+@@ -7841,6 +8739,9 @@
+ #define   S_028B6C_RESERVED_REDUC_AXIS(x)                             (((x) & 0x1) << 8) /* not on CIK */
+ #define   G_028B6C_RESERVED_REDUC_AXIS(x)                             (((x) >> 8) & 0x1) /* not on CIK */
+ #define   C_028B6C_RESERVED_REDUC_AXIS                                0xFFFFFEFF /* not on CIK */
++#define   S_028B6C_DEPRECATED(x)                                      (((x) & 0x1) << 9)
++#define   G_028B6C_DEPRECATED(x)                                      (((x) >> 9) & 0x1)
++#define   C_028B6C_DEPRECATED                                         0xFFFFFDFF
+ #define   S_028B6C_NUM_DS_WAVES_PER_SIMD(x)                           (((x) & 0x0F) << 10)
+ #define   G_028B6C_NUM_DS_WAVES_PER_SIMD(x)                           (((x) >> 10) & 0x0F)
+ #define   C_028B6C_NUM_DS_WAVES_PER_SIMD                              0xFFFFC3FF
+@@ -7855,6 +8756,14 @@
+ #define     V_028B6C_VGT_POLICY_STREAM                              0x01
+ #define     V_028B6C_VGT_POLICY_BYPASS                              0x02
+ /*     */
++/* VI */
++#define   S_028B6C_DISTRIBUTION_MODE(x)                               (((x) & 0x03) << 17)
++#define   G_028B6C_DISTRIBUTION_MODE(x)                               (((x) >> 17) & 0x03)
++#define   C_028B6C_DISTRIBUTION_MODE                                  0xFFF9FFFF
++#define   S_028B6C_MTYPE(x)                                           (((x) & 0x03) << 19)
++#define   G_028B6C_MTYPE(x)                                           (((x) >> 19) & 0x03)
++#define   C_028B6C_MTYPE                                              0xFFE7FFFF
++/*    */
+ #define R_028B70_DB_ALPHA_TO_MASK                                       0x028B70
+ #define   S_028B70_ALPHA_TO_MASK_ENABLE(x)                            (((x) & 0x1) << 0)
+ #define   G_028B70_ALPHA_TO_MASK_ENABLE(x)                            (((x) >> 0) & 0x1)
+@@ -7994,6 +8903,22 @@
+ #define   S_028BDC_DX10_DIAMOND_TEST_ENA(x)                           (((x) & 0x1) << 12)
+ #define   G_028BDC_DX10_DIAMOND_TEST_ENA(x)                           (((x) >> 12) & 0x1)
+ #define   C_028BDC_DX10_DIAMOND_TEST_ENA                              0xFFFFEFFF
++#define R_028BE0_PA_SC_AA_CONFIG                                        0x028BE0
++#define   S_028BE0_MSAA_NUM_SAMPLES(x)                                (((x) & 0x7) << 0)
++#define   G_028BE0_MSAA_NUM_SAMPLES(x)                                (((x) >> 0) & 0x07)
++#define   C_028BE0_MSAA_NUM_SAMPLES                                   0xFFFFFFF8
++#define   S_028BE0_AA_MASK_CENTROID_DTMN(x)                           (((x) & 0x1) << 4)
++#define   G_028BE0_AA_MASK_CENTROID_DTMN(x)                           (((x) >> 4) & 0x1)
++#define   C_028BE0_AA_MASK_CENTROID_DTMN                              0xFFFFFFEF
++#define   S_028BE0_MAX_SAMPLE_DIST(x)                                 (((x) & 0xf) << 13)
++#define   G_028BE0_MAX_SAMPLE_DIST(x)                                 (((x) >> 13) & 0x0F)
++#define   C_028BE0_MAX_SAMPLE_DIST                                    0xFFFE1FFF
++#define   S_028BE0_MSAA_EXPOSED_SAMPLES(x)                            (((x) & 0x7) << 20)
++#define   G_028BE0_MSAA_EXPOSED_SAMPLES(x)                            (((x) >> 20) & 0x07)
++#define   C_028BE0_MSAA_EXPOSED_SAMPLES                               0xFF8FFFFF
++#define   S_028BE0_DETAIL_TO_EXPOSED_MODE(x)                          (((x) & 0x3) << 24)
++#define   G_028BE0_DETAIL_TO_EXPOSED_MODE(x)                          (((x) >> 24) & 0x03)
++#define   C_028BE0_DETAIL_TO_EXPOSED_MODE                             0xFCFFFFFF
+ #define R_028BE4_PA_SU_VTX_CNTL                                         0x028BE4
+ #define   S_028BE4_PIX_CENTER(x)                                      (((x) & 0x1) << 0)
+ #define   G_028BE4_PIX_CENTER(x)                                      (((x) >> 0) & 0x1)
+@@ -8562,6 +9487,17 @@
+ #define   G_028C70_FMASK_COMPRESSION_DISABLE(x)                       (((x) >> 26) & 0x1)
+ #define   C_028C70_FMASK_COMPRESSION_DISABLE                          0xFBFFFFFF
+ /*     */
++/* VI */
++#define   S_028C70_FMASK_COMPRESS_1FRAG_ONLY(x)                       (((x) & 0x1) << 27)
++#define   G_028C70_FMASK_COMPRESS_1FRAG_ONLY(x)                       (((x) >> 27) & 0x1)
++#define   C_028C70_FMASK_COMPRESS_1FRAG_ONLY                          0xF7FFFFFF
++#define   S_028C70_DCC_ENABLE(x)                                      (((x) & 0x1) << 28)
++#define   G_028C70_DCC_ENABLE(x)                                      (((x) >> 28) & 0x1)
++#define   C_028C70_DCC_ENABLE                                         0xEFFFFFFF
++#define   S_028C70_CMASK_ADDR_TYPE(x)                                 (((x) & 0x03) << 29)
++#define   G_028C70_CMASK_ADDR_TYPE(x)                                 (((x) >> 29) & 0x03)
++#define   C_028C70_CMASK_ADDR_TYPE                                    0x9FFFFFFF
++/*    */
+ #define R_028C74_CB_COLOR0_ATTRIB                                       0x028C74
+ #define   S_028C74_TILE_MODE_INDEX(x)                                 (((x) & 0x1F) << 0)
+ #define   G_028C74_TILE_MODE_INDEX(x)                                 (((x) >> 0) & 0x1F)
+@@ -8569,7 +9505,9 @@
+ #define   S_028C74_FMASK_TILE_MODE_INDEX(x)                           (((x) & 0x1F) << 5)
+ #define   G_028C74_FMASK_TILE_MODE_INDEX(x)                           (((x) >> 5) & 0x1F)
+ #define   C_028C74_FMASK_TILE_MODE_INDEX                              0xFFFFFC1F
+-#define   S_028C74_FMASK_BANK_HEIGHT(x)				      (((x) & 0x3) << 10) /* SI errata */
++#define   S_028C74_FMASK_BANK_HEIGHT(x)                               (((x) & 0x03) << 10)
++#define   G_028C74_FMASK_BANK_HEIGHT(x)                               (((x) >> 10) & 0x03)
++#define   C_028C74_FMASK_BANK_HEIGHT                                  0xFFFFF3FF
+ #define   S_028C74_NUM_SAMPLES(x)                                     (((x) & 0x07) << 12)
+ #define   G_028C74_NUM_SAMPLES(x)                                     (((x) >> 12) & 0x07)
+ #define   C_028C74_NUM_SAMPLES                                        0xFFFF8FFF
+@@ -8579,6 +9517,36 @@
+ #define   S_028C74_FORCE_DST_ALPHA_1(x)                               (((x) & 0x1) << 17)
+ #define   G_028C74_FORCE_DST_ALPHA_1(x)                               (((x) >> 17) & 0x1)
+ #define   C_028C74_FORCE_DST_ALPHA_1                                  0xFFFDFFFF
++/* VI */
++#define R_028C78_CB_COLOR0_DCC_CONTROL                                  0x028C78
++#define   S_028C78_OVERWRITE_COMBINER_DISABLE(x)                      (((x) & 0x1) << 0)
++#define   G_028C78_OVERWRITE_COMBINER_DISABLE(x)                      (((x) >> 0) & 0x1)
++#define   C_028C78_OVERWRITE_COMBINER_DISABLE                         0xFFFFFFFE
++#define   S_028C78_KEY_CLEAR_ENABLE(x)                                (((x) & 0x1) << 1)
++#define   G_028C78_KEY_CLEAR_ENABLE(x)                                (((x) >> 1) & 0x1)
++#define   C_028C78_KEY_CLEAR_ENABLE                                   0xFFFFFFFD
++#define   S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(x)                     (((x) & 0x03) << 2)
++#define   G_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(x)                     (((x) >> 2) & 0x03)
++#define   C_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE                        0xFFFFFFF3
++#define   S_028C78_MIN_COMPRESSED_BLOCK_SIZE(x)                       (((x) & 0x1) << 4)
++#define   G_028C78_MIN_COMPRESSED_BLOCK_SIZE(x)                       (((x) >> 4) & 0x1)
++#define   C_028C78_MIN_COMPRESSED_BLOCK_SIZE                          0xFFFFFFEF
++#define   S_028C78_MAX_COMPRESSED_BLOCK_SIZE(x)                       (((x) & 0x03) << 5)
++#define   G_028C78_MAX_COMPRESSED_BLOCK_SIZE(x)                       (((x) >> 5) & 0x03)
++#define   C_028C78_MAX_COMPRESSED_BLOCK_SIZE                          0xFFFFFF9F
++#define   S_028C78_COLOR_TRANSFORM(x)                                 (((x) & 0x03) << 7)
++#define   G_028C78_COLOR_TRANSFORM(x)                                 (((x) >> 7) & 0x03)
++#define   C_028C78_COLOR_TRANSFORM                                    0xFFFFFE7F
++#define   S_028C78_INDEPENDENT_64B_BLOCKS(x)                          (((x) & 0x1) << 9)
++#define   G_028C78_INDEPENDENT_64B_BLOCKS(x)                          (((x) >> 9) & 0x1)
++#define   C_028C78_INDEPENDENT_64B_BLOCKS                             0xFFFFFDFF
++#define   S_028C78_LOSSY_RGB_PRECISION(x)                             (((x) & 0x0F) << 10)
++#define   G_028C78_LOSSY_RGB_PRECISION(x)                             (((x) >> 10) & 0x0F)
++#define   C_028C78_LOSSY_RGB_PRECISION                                0xFFFFC3FF
++#define   S_028C78_LOSSY_ALPHA_PRECISION(x)                           (((x) & 0x0F) << 14)
++#define   G_028C78_LOSSY_ALPHA_PRECISION(x)                           (((x) >> 14) & 0x0F)
++#define   C_028C78_LOSSY_ALPHA_PRECISION                              0xFFFC3FFF
++/*    */
+ #define R_028C7C_CB_COLOR0_CMASK                                        0x028C7C
+ #define R_028C80_CB_COLOR0_CMASK_SLICE                                  0x028C80
+ #define   S_028C80_TILE_MAX(x)                                        (((x) & 0x3FFF) << 0)
+@@ -8591,90 +9559,105 @@
+ #define   C_028C88_TILE_MAX                                           0xFFC00000
+ #define R_028C8C_CB_COLOR0_CLEAR_WORD0                                  0x028C8C
+ #define R_028C90_CB_COLOR0_CLEAR_WORD1                                  0x028C90
++#define R_028C94_CB_COLOR0_DCC_BASE                                     0x028C94 /* VI */
+ #define R_028C9C_CB_COLOR1_BASE                                         0x028C9C
+ #define R_028CA0_CB_COLOR1_PITCH                                        0x028CA0
+ #define R_028CA4_CB_COLOR1_SLICE                                        0x028CA4
+ #define R_028CA8_CB_COLOR1_VIEW                                         0x028CA8
+ #define R_028CAC_CB_COLOR1_INFO                                         0x028CAC
+ #define R_028CB0_CB_COLOR1_ATTRIB                                       0x028CB0
+-#define R_028CD4_CB_COLOR1_CMASK                                        0x028CB8
++#define R_028CB4_CB_COLOR1_DCC_CONTROL                                  0x028CB4 /* VI */
++#define R_028CB8_CB_COLOR1_CMASK                                        0x028CB8
+ #define R_028CBC_CB_COLOR1_CMASK_SLICE                                  0x028CBC
+ #define R_028CC0_CB_COLOR1_FMASK                                        0x028CC0
+ #define R_028CC4_CB_COLOR1_FMASK_SLICE                                  0x028CC4
+ #define R_028CC8_CB_COLOR1_CLEAR_WORD0                                  0x028CC8
+ #define R_028CCC_CB_COLOR1_CLEAR_WORD1                                  0x028CCC
++#define R_028CD0_CB_COLOR1_DCC_BASE                                     0x028CD0 /* VI */
+ #define R_028CD8_CB_COLOR2_BASE                                         0x028CD8
+ #define R_028CDC_CB_COLOR2_PITCH                                        0x028CDC
+ #define R_028CE0_CB_COLOR2_SLICE                                        0x028CE0
+ #define R_028CE4_CB_COLOR2_VIEW                                         0x028CE4
+ #define R_028CE8_CB_COLOR2_INFO                                         0x028CE8
+ #define R_028CEC_CB_COLOR2_ATTRIB                                       0x028CEC
++#define R_028CF0_CB_COLOR2_DCC_CONTROL                                  0x028CF0 /* VI */
+ #define R_028CF4_CB_COLOR2_CMASK                                        0x028CF4
+ #define R_028CF8_CB_COLOR2_CMASK_SLICE                                  0x028CF8
+ #define R_028CFC_CB_COLOR2_FMASK                                        0x028CFC
+ #define R_028D00_CB_COLOR2_FMASK_SLICE                                  0x028D00
+ #define R_028D04_CB_COLOR2_CLEAR_WORD0                                  0x028D04
+ #define R_028D08_CB_COLOR2_CLEAR_WORD1                                  0x028D08
++#define R_028D0C_CB_COLOR2_DCC_BASE                                     0x028D0C /* VI */
+ #define R_028D14_CB_COLOR3_BASE                                         0x028D14
+ #define R_028D18_CB_COLOR3_PITCH                                        0x028D18
+ #define R_028D1C_CB_COLOR3_SLICE                                        0x028D1C
+ #define R_028D20_CB_COLOR3_VIEW                                         0x028D20
+ #define R_028D24_CB_COLOR3_INFO                                         0x028D24
+ #define R_028D28_CB_COLOR3_ATTRIB                                       0x028D28
++#define R_028D2C_CB_COLOR3_DCC_CONTROL                                  0x028D2C /* VI */
+ #define R_028D30_CB_COLOR3_CMASK                                        0x028D30
+ #define R_028D34_CB_COLOR3_CMASK_SLICE                                  0x028D34
+ #define R_028D38_CB_COLOR3_FMASK                                        0x028D38
+ #define R_028D3C_CB_COLOR3_FMASK_SLICE                                  0x028D3C
+ #define R_028D40_CB_COLOR3_CLEAR_WORD0                                  0x028D40
+ #define R_028D44_CB_COLOR3_CLEAR_WORD1                                  0x028D44
++#define R_028D48_CB_COLOR3_DCC_BASE                                     0x028D48 /* VI */
+ #define R_028D50_CB_COLOR4_BASE                                         0x028D50
+ #define R_028D54_CB_COLOR4_PITCH                                        0x028D54
+ #define R_028D58_CB_COLOR4_SLICE                                        0x028D58
+ #define R_028D5C_CB_COLOR4_VIEW                                         0x028D5C
+ #define R_028D60_CB_COLOR4_INFO                                         0x028D60
+ #define R_028D64_CB_COLOR4_ATTRIB                                       0x028D64
++#define R_028D68_CB_COLOR4_DCC_CONTROL                                  0x028D68 /* VI */
+ #define R_028D6C_CB_COLOR4_CMASK                                        0x028D6C
+ #define R_028D70_CB_COLOR4_CMASK_SLICE                                  0x028D70
+ #define R_028D74_CB_COLOR4_FMASK                                        0x028D74
+ #define R_028D78_CB_COLOR4_FMASK_SLICE                                  0x028D78
+ #define R_028D7C_CB_COLOR4_CLEAR_WORD0                                  0x028D7C
+ #define R_028D80_CB_COLOR4_CLEAR_WORD1                                  0x028D80
++#define R_028D84_CB_COLOR4_DCC_BASE                                     0x028D84 /* VI */
+ #define R_028D8C_CB_COLOR5_BASE                                         0x028D8C
+ #define R_028D90_CB_COLOR5_PITCH                                        0x028D90
+ #define R_028D94_CB_COLOR5_SLICE                                        0x028D94
+ #define R_028D98_CB_COLOR5_VIEW                                         0x028D98
+ #define R_028D9C_CB_COLOR5_INFO                                         0x028D9C
+ #define R_028DA0_CB_COLOR5_ATTRIB                                       0x028DA0
++#define R_028DA4_CB_COLOR5_DCC_CONTROL                                  0x028DA4 /* VI */
+ #define R_028DA8_CB_COLOR5_CMASK                                        0x028DA8
+ #define R_028DAC_CB_COLOR5_CMASK_SLICE                                  0x028DAC
+ #define R_028DB0_CB_COLOR5_FMASK                                        0x028DB0
+ #define R_028DB4_CB_COLOR5_FMASK_SLICE                                  0x028DB4
+ #define R_028DB8_CB_COLOR5_CLEAR_WORD0                                  0x028DB8
+ #define R_028DBC_CB_COLOR5_CLEAR_WORD1                                  0x028DBC
++#define R_028DC0_CB_COLOR5_DCC_BASE                                     0x028DC0 /* VI */
+ #define R_028DC8_CB_COLOR6_BASE                                         0x028DC8
+ #define R_028DCC_CB_COLOR6_PITCH                                        0x028DCC
+ #define R_028DD0_CB_COLOR6_SLICE                                        0x028DD0
+ #define R_028DD4_CB_COLOR6_VIEW                                         0x028DD4
+ #define R_028DD8_CB_COLOR6_INFO                                         0x028DD8
+ #define R_028DDC_CB_COLOR6_ATTRIB                                       0x028DDC
++#define R_028DE0_CB_COLOR6_DCC_CONTROL                                  0x028DE0 /* VI */
+ #define R_028DE4_CB_COLOR6_CMASK                                        0x028DE4
+ #define R_028DE8_CB_COLOR6_CMASK_SLICE                                  0x028DE8
+ #define R_028DEC_CB_COLOR6_FMASK                                        0x028DEC
+ #define R_028DF0_CB_COLOR6_FMASK_SLICE                                  0x028DF0
+ #define R_028DF4_CB_COLOR6_CLEAR_WORD0                                  0x028DF4
+ #define R_028DF8_CB_COLOR6_CLEAR_WORD1                                  0x028DF8
++#define R_028DFC_CB_COLOR6_DCC_BASE                                     0x028DFC /* VI */
+ #define R_028E04_CB_COLOR7_BASE                                         0x028E04
+ #define R_028E08_CB_COLOR7_PITCH                                        0x028E08
+ #define R_028E0C_CB_COLOR7_SLICE                                        0x028E0C
+ #define R_028E10_CB_COLOR7_VIEW                                         0x028E10
+ #define R_028E14_CB_COLOR7_INFO                                         0x028E14
+ #define R_028E18_CB_COLOR7_ATTRIB                                       0x028E18
++#define R_028E1C_CB_COLOR7_DCC_CONTROL                                  0x028E1C /* VI */
+ #define R_028E20_CB_COLOR7_CMASK                                        0x028E20
+ #define R_028E24_CB_COLOR7_CMASK_SLICE                                  0x028E24
+ #define R_028E28_CB_COLOR7_FMASK                                        0x028E28
+ #define R_028E2C_CB_COLOR7_FMASK_SLICE                                  0x028E2C
+ #define R_028E30_CB_COLOR7_CLEAR_WORD0                                  0x028E30
+ #define R_028E34_CB_COLOR7_CLEAR_WORD1                                  0x028E34
++#define R_028E38_CB_COLOR7_DCC_BASE                                     0x028E38 /* VI */
+ 
+ /* SI async DMA packets */
+ #define SI_DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) |    \
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0012-radeonsi-add-VI-hardware-support.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0012-radeonsi-add-VI-hardware-support.patch
new file mode 100644
index 00000000..60b858b2
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0012-radeonsi-add-VI-hardware-support.patch
@@ -0,0 +1,410 @@
+From 5de6f83f9ce743f5a2ae600bb5cb2eba48a54d3f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 20:44:54 +0200
+Subject: [PATCH 12/29] radeonsi: add VI hardware support
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/r600_pipe_common.c   |  6 ++++
+ src/gallium/drivers/radeon/r600_pipe_common.h   |  1 +
+ src/gallium/drivers/radeonsi/si_descriptors.c   | 15 +++++++--
+ src/gallium/drivers/radeonsi/si_pipe.c          |  5 ++-
+ src/gallium/drivers/radeonsi/si_shader.c        | 24 +++++++++++--
+ src/gallium/drivers/radeonsi/si_state.c         | 45 +++++++++++++++++++++----
+ src/gallium/drivers/radeonsi/si_state_draw.c    | 41 ++++++++++++++++------
+ src/gallium/drivers/radeonsi/si_state_shaders.c |  8 ++++-
+ 8 files changed, 121 insertions(+), 24 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
+index 3b26dea..80a32ca 100644
+--- a/src/gallium/drivers/radeon/r600_pipe_common.c
++++ b/src/gallium/drivers/radeon/r600_pipe_common.c
+@@ -379,6 +379,9 @@ static const char* r600_get_name(struct pipe_screen* pscreen)
+ 	case CHIP_KABINI: return "AMD KABINI";
+ 	case CHIP_HAWAII: return "AMD HAWAII";
+ 	case CHIP_MULLINS: return "AMD MULLINS";
++	case CHIP_TONGA: return "AMD TONGA";
++	case CHIP_ICELAND: return "AMD ICELAND";
++	case CHIP_CARRIZO: return "AMD CARRIZO";
+ 	default: return "AMD unknown";
+ 	}
+ }
+@@ -496,6 +499,9 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
+ #else
+ 		return "kabini";
+ #endif
++	case CHIP_TONGA: return "tonga";
++	case CHIP_ICELAND: return "iceland";
++	case CHIP_CARRIZO: return "carrizo";
+ 	default: return "";
+ 	}
+ }
+diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
+index bdb4541..8944059 100644
+--- a/src/gallium/drivers/radeon/r600_pipe_common.h
++++ b/src/gallium/drivers/radeon/r600_pipe_common.h
+@@ -232,6 +232,7 @@ struct r600_surface {
+ 	unsigned cb_color_pitch;	/* EG and later */
+ 	unsigned cb_color_slice;	/* EG and later */
+ 	unsigned cb_color_attrib;	/* EG and later */
++	unsigned cb_dcc_control;	/* VI and later */
+ 	unsigned cb_color_fmask;	/* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
+ 	unsigned cb_color_fmask_slice;	/* EG and later */
+ 	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
+diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
+index bbfd36d..88a507d 100644
+--- a/src/gallium/drivers/radeonsi/si_descriptors.c
++++ b/src/gallium/drivers/radeonsi/si_descriptors.c
+@@ -675,7 +675,8 @@ void si_update_vertex_buffers(struct si_context *sctx)
+ 		desc[0] = va & 0xFFFFFFFF;
+ 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+ 			  S_008F04_STRIDE(vb->stride);
+-		if (vb->stride)
++
++		if (sctx->b.chip_class <= CIK && vb->stride)
+ 			/* Round up by rounding down and adding 1 */
+ 			desc[2] = (vb->buffer->width0 - offset -
+ 				   sctx->vertex_elements->format_size[i]) /
+@@ -839,6 +840,9 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
+ 			break;
+ 		}
+ 
++		if (sctx->b.chip_class >= VI && stride)
++			num_records *= stride;
++
+ 		/* Set the descriptor. */
+ 		uint32_t *desc = buffers->desc_data[slot];
+ 		desc[0] = va;
+@@ -929,7 +933,11 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
+ 			struct pipe_resource *buffer = targets[i]->buffer;
+ 			uint64_t va = r600_resource(buffer)->gpu_address;
+ 
+-			/* Set the descriptor. */
++			/* Set the descriptor.
++			 *
++			 * On VI, the format must be non-INVALID, otherwise
++			 * the buffer will be considered not bound and store
++			 * instructions will be no-ops. */
+ 			uint32_t *desc = buffers->desc_data[bufidx];
+ 			desc[0] = va;
+ 			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+@@ -937,7 +945,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
+ 			desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ 				  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ 				  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+-				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
++				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
++				  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ 
+ 			/* Set the resource. */
+ 			pipe_resource_reference(&buffers->buffers[bufidx],
+diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
+index 91b4d6f..0bb7a35 100644
+--- a/src/gallium/drivers/radeonsi/si_pipe.c
++++ b/src/gallium/drivers/radeonsi/si_pipe.c
+@@ -130,6 +130,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
+ 	switch (sctx->b.chip_class) {
+ 	case SI:
+ 	case CIK:
++	case VI:
+ 		si_init_state_functions(sctx);
+ 		si_init_shader_functions(sctx);
+ 		si_init_config(sctx);
+@@ -181,7 +182,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
+ 	r600_target = radeon_llvm_get_r600_target(triple);
+ 	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
+ 					   r600_get_llvm_processor_name(sscreen->b.family),
+-					   "+DumpCode,+vgpr-spilling",
++					   sctx->b.chip_class >= VI ?
++						   "+DumpCode" :
++						   "+DumpCode,+vgpr-spilling",
+ 					   LLVMCodeGenLevelDefault,
+ 					   LLVMRelocDefault,
+ 					   LLVMCodeModelDefault);
+diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
+index 89f02ab..9b9a9aa 100644
+--- a/src/gallium/drivers/radeonsi/si_shader.c
++++ b/src/gallium/drivers/radeonsi/si_shader.c
+@@ -2060,6 +2060,7 @@ static void txq_fetch_args(
+ 	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ 	const struct tgsi_full_instruction *inst = emit_data->inst;
+ 	struct gallivm_state *gallivm = bld_base->base.gallivm;
++	LLVMBuilderRef builder = gallivm->builder;
+ 	unsigned target = inst->Texture.Texture;
+ 
+ 	if (target == TGSI_TEXTURE_BUFFER) {
+@@ -2067,10 +2068,27 @@ static void txq_fetch_args(
+ 		LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
+ 
+ 		/* Read the size from the buffer descriptor directly. */
+-		LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
+-		size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
+-		size = LLVMBuildExtractElement(gallivm->builder, size,
++		LLVMValueRef res = si_shader_ctx->resources[inst->Src[1].Register.Index];
++		res = LLVMBuildBitCast(builder, res, v8i32, "");
++		LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+ 					      lp_build_const_int32(gallivm, 6), "");
++
++		if (si_shader_ctx->screen->b.chip_class >= VI) {
++			/* On VI, the descriptor contains the size in bytes,
++			 * but TXQ must return the size in elements.
++			 * The stride is always non-zero for resources using TXQ.
++			 */
++			LLVMValueRef stride =
++				LLVMBuildExtractElement(builder, res,
++							lp_build_const_int32(gallivm, 5), "");
++			stride = LLVMBuildLShr(builder, stride,
++					       lp_build_const_int32(gallivm, 16), "");
++			stride = LLVMBuildAnd(builder, stride,
++					      lp_build_const_int32(gallivm, 0x3FFF), "");
++
++			size = LLVMBuildUDiv(builder, size, stride, "");
++		}
++
+ 		emit_data->args[0] = size;
+ 		return;
+ 	}
+diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
+index 6a0b093..b35fbd5 100644
+--- a/src/gallium/drivers/radeonsi/si_state.c
++++ b/src/gallium/drivers/radeonsi/si_state.c
+@@ -46,7 +46,7 @@ static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
+ 
+ uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
+ {
+-	if (sscreen->b.chip_class == CIK &&
++	if (sscreen->b.chip_class >= CIK &&
+ 	    sscreen->b.info.cik_macrotile_mode_array_valid) {
+ 		unsigned index, tileb;
+ 
+@@ -1815,6 +1815,9 @@ static void si_initialize_color_surface(struct si_context *sctx,
+ 	surf->cb_color_info = color_info;
+ 	surf->cb_color_attrib = color_attrib;
+ 
++	if (sctx->b.chip_class >= VI)
++		surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
++
+ 	if (rtex->fmask.size) {
+ 		surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
+ 		surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
+@@ -1960,6 +1963,10 @@ static void si_init_depth_surface(struct si_context *sctx,
+ 		db_htile_surface = 0;
+ 	}
+ 
++	/* Bug workaround. */
++	if (sctx->b.chip_class >= VI)
++		s_info |= S_028044_TILE_STENCIL_DISABLE(1);
++
+ 	assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
+ 
+ 	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
+@@ -2051,7 +2058,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
+ 	si_update_fb_rs_state(sctx);
+ 	si_update_fb_blend_state(sctx);
+ 
+-	sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
++	sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
+ 	sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
+ 	sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
+ 	sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
+@@ -2130,20 +2137,24 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
+ 				RADEON_PRIO_COLOR_META);
+ 		}
+ 
+-		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
++		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
++					   sctx->b.chip_class >= VI ? 14 : 13);
+ 		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
+ 		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
+ 		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
+ 		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
+ 		radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
+ 		radeon_emit(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
+-		radeon_emit(cs, 0);			/* R_028C78 unused */
++		radeon_emit(cs, cb->cb_dcc_control);	/* R_028C78_CB_COLOR0_DCC_CONTROL */
+ 		radeon_emit(cs, tex->cmask.base_address_reg);	/* R_028C7C_CB_COLOR0_CMASK */
+ 		radeon_emit(cs, tex->cmask.slice_tile_max);	/* R_028C80_CB_COLOR0_CMASK_SLICE */
+ 		radeon_emit(cs, cb->cb_color_fmask);		/* R_028C84_CB_COLOR0_FMASK */
+ 		radeon_emit(cs, cb->cb_color_fmask_slice);	/* R_028C88_CB_COLOR0_FMASK_SLICE */
+ 		radeon_emit(cs, tex->color_clear_value[0]);	/* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
+ 		radeon_emit(cs, tex->color_clear_value[1]);	/* R_028C90_CB_COLOR0_CLEAR_WORD1 */
++
++		if (sctx->b.chip_class >= VI)
++			radeon_emit(cs, 0);	/* R_028C94_CB_COLOR0_DCC_BASE */
+ 	}
+ 	/* set CB_COLOR1_INFO for possible dual-src blending */
+ 	if (i == 1 && state->cbufs[0]) {
+@@ -2286,7 +2297,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
+ 
+ 	/* Buffer resource. */
+ 	if (texture->target == PIPE_BUFFER) {
+-		unsigned stride;
++		unsigned stride, num_records;
+ 
+ 		desc = util_format_description(state->format);
+ 		first_non_void = util_format_get_first_non_void_channel(state->format);
+@@ -2295,10 +2306,16 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
+ 		format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
+ 		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
+ 
++		num_records = state->u.buf.last_element + 1 - state->u.buf.first_element;
++		num_records = MIN2(num_records, texture->width0 / stride);
++
++		if (sctx->b.chip_class >= VI)
++			num_records *= stride;
++
+ 		view->state[4] = va;
+ 		view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+ 				 S_008F04_STRIDE(stride);
+-		view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element;
++		view->state[6] = num_records;
+ 		view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
+ 				 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
+ 				 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
+@@ -3070,6 +3087,15 @@ void si_init_config(struct si_context *sctx)
+ 			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
+ 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
+ 			break;
++		case CHIP_TONGA:
++			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
++			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002a);
++			break;
++		case CHIP_ICELAND:
++		case CHIP_CARRIZO:
++			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000002);
++			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
++			break;
+ 		case CHIP_KAVERI:
+ 			/* XXX todo */
+ 		case CHIP_KABINI:
+@@ -3162,5 +3188,12 @@ void si_init_config(struct si_context *sctx)
+ 		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
+ 	}
+ 
++	if (sctx->b.chip_class >= VI) {
++		si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
++			       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
++		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
++		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
++	}
++
+ 	sctx->init_config = pm4;
+ }
+diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
+index 2e77d85..2d38b20 100644
+--- a/src/gallium/drivers/radeonsi/si_state_draw.c
++++ b/src/gallium/drivers/radeonsi/si_state_draw.c
+@@ -146,7 +146,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
+ 	return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
+ 		S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
+ 		S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
+-		S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0);
++		S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
++		S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
+ }
+ 
+ static void si_emit_scratch_reloc(struct si_context *sctx)
+@@ -275,12 +276,24 @@ static void si_emit_draw_packets(struct si_context *sctx,
+ 	if (info->indexed) {
+ 		radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
+ 
+-		if (ib->index_size == 4) {
+-			radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ?
+-					V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
+-		} else {
+-			radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ?
+-					V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
++		/* index type */
++		switch (ib->index_size) {
++		case 1:
++			radeon_emit(cs, V_028A7C_VGT_INDEX_8);
++			break;
++		case 2:
++			radeon_emit(cs, V_028A7C_VGT_INDEX_16 |
++				    (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
++					     V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
++			break;
++		case 4:
++			radeon_emit(cs, V_028A7C_VGT_INDEX_32 |
++				    (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
++					     V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
++			break;
++		default:
++			assert(!"unreachable");
++			return;
+ 		}
+ 	}
+ 
+@@ -406,9 +419,14 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
+ 
+ 	if (sctx->flags & SI_CONTEXT_INV_TC_L1)
+ 		cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
+-	if (sctx->flags & SI_CONTEXT_INV_TC_L2)
++	if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
+ 		cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+ 
++		/* TODO: this might not be needed. */
++		if (sctx->chip_class >= VI)
++			cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
++	}
++
+ 	if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
+ 		cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
+ 				 S_0085F0_CB0_DEST_BASE_ENA(1) |
+@@ -550,7 +568,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+ 		ib.offset = sctx->index_buffer.offset;
+ 
+ 		/* Translate or upload, if needed. */
+-		if (ib.index_size == 1) {
++		/* 8-bit indices are supported on VI. */
++		if (sctx->b.chip_class <= CIK && ib.index_size == 1) {
+ 			struct pipe_resource *out_buffer = NULL;
+ 			unsigned out_offset, start, count, start_offset;
+ 			void *ptr;
+@@ -585,6 +604,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+ 		}
+ 	}
+ 
++	/* TODO: VI should read index buffers through TC, so this shouldn't be
++	 * needed on VI. */
+ 	if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
+ 		sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
+ 		r600_resource(ib.buffer)->TC_L2_dirty = false;
+@@ -618,7 +639,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+ 
+ 	/* Workaround for a VGT hang when streamout is enabled.
+ 	 * It must be done after drawing. */
+-	if (sctx->b.family == CHIP_HAWAII &&
++	if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
+ 	    (sctx->b.streamout.streamout_enabled ||
+ 	     sctx->b.streamout.prims_gen_query_enabled)) {
+ 		sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
+diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
+index 1bbc6b3..09ca792 100644
+--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
++++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
+@@ -700,7 +700,7 @@ bcolor:
+ static void si_init_gs_rings(struct si_context *sctx)
+ {
+ 	unsigned esgs_ring_size = 128 * 1024;
+-	unsigned gsvs_ring_size = 64 * 1024 * 1024;
++	unsigned gsvs_ring_size = 60 * 1024 * 1024;
+ 
+ 	assert(!sctx->gs_rings);
+ 	sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
+@@ -712,6 +712,12 @@ static void si_init_gs_rings(struct si_context *sctx)
+ 					     PIPE_USAGE_DEFAULT, gsvs_ring_size);
+ 
+ 	if (sctx->b.chip_class >= CIK) {
++		if (sctx->b.chip_class >= VI) {
++			/* The maximum sizes are 63.999 MB on VI, because
++			 * the register fields only have 18 bits. */
++			assert(esgs_ring_size / 256 < (1 << 18));
++			assert(gsvs_ring_size / 256 < (1 << 18));
++		}
+ 		si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
+ 			       esgs_ring_size / 256);
+ 		si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0013-radeonsi-add-a-temporary-workaround-for-a-shader-bug.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0013-radeonsi-add-a-temporary-workaround-for-a-shader-bug.patch
new file mode 100644
index 00000000..7180ff45
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0013-radeonsi-add-a-temporary-workaround-for-a-shader-bug.patch
@@ -0,0 +1,163 @@
+From 5c6a9bdcff5a32d31433896f34078e7a81ca3604 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 22:33:04 +0200
+Subject: [PATCH 13/29] radeonsi: add a temporary workaround for a shader bug
+
+This will be reverted after the corresponding LLVM fix is cherry-picked and
+released as part of LLVM 3.6.
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeonsi/si_state_shaders.c | 47 ++++++++++++++++---------
+ 1 file changed, 31 insertions(+), 16 deletions(-)
+
+diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
+index 09ca792..5833ee1 100644
+--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
++++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
+@@ -33,7 +33,19 @@
+ #include "util/u_memory.h"
+ #include "util/u_simple_shaders.h"
+ 
+-static void si_shader_es(struct si_shader *shader)
++/* TODO: the compiler has a better workaround, remove this */
++static unsigned si_adjust_num_sgprs(struct si_screen *sscreen, unsigned num_sgprs)
++{
++	assert(num_sgprs <= 104);
++
++	if (sscreen->b.family == CHIP_TONGA ||
++	    sscreen->b.family == CHIP_ICELAND)
++		num_sgprs = 104;
++
++	return num_sgprs;
++}
++
++static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
+ {
+ 	struct si_pm4_state *pm4;
+ 	unsigned num_sgprs, num_user_sgprs;
+@@ -57,7 +69,7 @@ static void si_shader_es(struct si_shader *shader)
+ 		/* Last 2 reserved SGPRs are used for VCC */
+ 		num_sgprs = num_user_sgprs + 1 + 2;
+ 	}
+-	assert(num_sgprs <= 104);
++	num_sgprs = si_adjust_num_sgprs(sscreen, num_sgprs);
+ 
+ 	si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+ 	si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
+@@ -71,7 +83,7 @@ static void si_shader_es(struct si_shader *shader)
+ 		       S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ }
+ 
+-static void si_shader_gs(struct si_shader *shader)
++static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
+ {
+ 	unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
+ 	unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
+@@ -130,7 +142,7 @@ static void si_shader_gs(struct si_shader *shader)
+ 		/* Last 2 reserved SGPRs are used for VCC */
+ 		num_sgprs = num_user_sgprs + 2 + 2;
+ 	}
+-	assert(num_sgprs <= 104);
++	num_sgprs = si_adjust_num_sgprs(sscreen, num_sgprs);
+ 
+ 	si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
+ 		       S_00B228_VGPRS((shader->num_vgprs - 1) / 4) |
+@@ -141,7 +153,7 @@ static void si_shader_gs(struct si_shader *shader)
+ 		       S_00B22C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ }
+ 
+-static void si_shader_vs(struct si_shader *shader)
++static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader)
+ {
+ 	struct tgsi_shader_info *info = &shader->selector->info;
+ 	struct si_pm4_state *pm4;
+@@ -173,7 +185,7 @@ static void si_shader_vs(struct si_shader *shader)
+ 		/* Last 2 reserved SGPRs are used for VCC */
+ 		num_sgprs = num_user_sgprs + 2;
+ 	}
+-	assert(num_sgprs <= 104);
++	num_sgprs = si_adjust_num_sgprs(sscreen, num_sgprs);
+ 
+ 	/* Certain attributes (position, psize, etc.) don't count as params.
+ 	 * VS is required to export at least one param and r600_shader_from_tgsi()
+@@ -233,7 +245,7 @@ static void si_shader_vs(struct si_shader *shader)
+ 			       S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
+ }
+ 
+-static void si_shader_ps(struct si_shader *shader)
++static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
+ {
+ 	struct tgsi_shader_info *info = &shader->selector->info;
+ 	struct si_pm4_state *pm4;
+@@ -308,7 +320,7 @@ static void si_shader_ps(struct si_shader *shader)
+ 		/* Last 2 reserved SGPRs are used for VCC */
+ 		num_sgprs = num_user_sgprs + 1 + 2;
+ 	}
+-	assert(num_sgprs <= 104);
++	num_sgprs = si_adjust_num_sgprs(sscreen, num_sgprs);
+ 
+ 	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
+ 		       S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
+@@ -320,7 +332,8 @@ static void si_shader_ps(struct si_shader *shader)
+ 		       S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+ }
+ 
+-static void si_shader_init_pm4_state(struct si_shader *shader)
++static void si_shader_init_pm4_state(struct si_screen *sscreen,
++				     struct si_shader *shader)
+ {
+ 
+ 	if (shader->pm4)
+@@ -329,16 +342,16 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
+ 	switch (shader->selector->type) {
+ 	case PIPE_SHADER_VERTEX:
+ 		if (shader->key.vs.as_es)
+-			si_shader_es(shader);
++			si_shader_es(sscreen, shader);
+ 		else
+-			si_shader_vs(shader);
++			si_shader_vs(sscreen, shader);
+ 		break;
+ 	case PIPE_SHADER_GEOMETRY:
+-		si_shader_gs(shader);
+-		si_shader_vs(shader->gs_copy_shader);
++		si_shader_gs(sscreen, shader);
++		si_shader_vs(sscreen, shader->gs_copy_shader);
+ 		break;
+ 	case PIPE_SHADER_FRAGMENT:
+-		si_shader_ps(shader);
++		si_shader_ps(sscreen, shader);
+ 		break;
+ 	default:
+ 		assert(0);
+@@ -439,6 +452,8 @@ static int si_shader_select(struct pipe_context *ctx,
+ 		shader->next_variant = sel->current;
+ 		sel->current = shader;
+ 	} else {
++		struct si_screen *sscreen = (struct si_screen*)ctx->screen;
++
+ 		shader = CALLOC(1, sizeof(struct si_shader));
+ 		shader->selector = sel;
+ 		shader->key = key;
+@@ -454,7 +469,7 @@ static int si_shader_select(struct pipe_context *ctx,
+ 			FREE(shader);
+ 			return r;
+ 		}
+-		si_shader_init_pm4_state(shader);
++		si_shader_init_pm4_state(sscreen, shader);
+ 		sel->num_shaders++;
+ 	}
+ 
+@@ -778,7 +793,7 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
+ 	sctx->screen->b.ws->buffer_unmap(shader->bo->cs_buf);
+ 
+ 	/* Update the shader state to use the new shader bo. */
+-	si_shader_init_pm4_state(shader);
++	si_shader_init_pm4_state(sctx->screen, shader);
+ 
+ 	r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
+ 
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0014-gallium-radeon-use-VM-for-UVD.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0014-gallium-radeon-use-VM-for-UVD.patch
new file mode 100644
index 00000000..889eb023
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0014-gallium-radeon-use-VM-for-UVD.patch
@@ -0,0 +1,64 @@
+From cbd8c552976aeebe913749c511c6656d84cd23a4 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Wed, 9 Apr 2014 19:41:06 +0200
+Subject: [PATCH 14/29] gallium/radeon: use VM for UVD
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+v2: (leo) add checking for driver backend
+v3: (leo) change variable name from use_amdgpu to use_vm
+v4: rebase by Marek
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_uvd.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
+index be58d0b..ac850a2 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.c
++++ b/src/gallium/drivers/radeon/radeon_uvd.c
+@@ -82,6 +82,7 @@ struct ruvd_decoder {
+ 	unsigned			bs_size;
+ 
+ 	struct rvid_buffer		dpb;
++	bool				use_legacy;
+ };
+ 
+ /* flush IB to the hardware */
+@@ -107,8 +108,16 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
+ 
+ 	reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
+ 					  RADEON_PRIO_MIN);
+-	set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
+-	set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
++	if (!dec->use_legacy) {
++		uint64_t addr;
++		addr = dec->ws->buffer_get_virtual_address(cs_buf);
++		addr = addr + off;
++		set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr);
++		set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32);
++	} else {
++		set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
++		set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
++	}
+ 	set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
+ }
+ 
+@@ -791,6 +800,9 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ 	if (!dec)
+ 		return NULL;
+ 
++	if (info.drm_major < 3)
++		dec->use_legacy = TRUE;
++
+ 	dec->base = *templ;
+ 	dec->base.context = context;
+ 	dec->base.width = width;
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0015-gallium-radeon-use-VM-for-VCE.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0015-gallium-radeon-use-VM-for-VCE.patch
new file mode 100644
index 00000000..6524c25a
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0015-gallium-radeon-use-VM-for-VCE.patch
@@ -0,0 +1,151 @@
+From 36b6118dfeae432644cb6fbba249319976c35dfa Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Thu, 10 Apr 2014 17:18:32 +0200
+Subject: [PATCH 15/29] gallium/radeon: use VM for VCE
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+v2: (leo) add checking for driver backend
+v3: (leo) change variable name from use_amdgpu to use_vm
+v4: rebase by Marek
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_vce.c        | 24 ++++++++++++++++++++++++
+ src/gallium/drivers/radeon/radeon_vce.h        | 15 +++++++++------
+ src/gallium/drivers/radeon/radeon_vce_40_2_2.c | 17 +++++++----------
+ 3 files changed, 40 insertions(+), 16 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
+index e220f40..d1495a2 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.c
++++ b/src/gallium/drivers/radeon/radeon_vce.c
+@@ -353,6 +353,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ 	if (!enc)
+ 		return NULL;
+ 
++	if (rscreen->info.drm_major == 3)
++		enc->use_vm = true;
+ 	if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
+ 		enc->use_vui = true;
+ 
+@@ -428,3 +430,25 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
+ {
+ 	return rscreen->info.vce_fw_version == ((40 << 24) | (2 << 16) | (2 << 8));
+ }
++
++/**
++ * Add the buffer as relocation to the current command submission
++ */
++void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
++                     enum radeon_bo_usage usage, enum radeon_bo_domain domain,
++                     uint32_t offset)
++{
++	int reloc_idx;
++
++	reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN);
++	if (enc->use_vm) {
++		uint64_t addr;
++		addr = enc->ws->buffer_get_virtual_address(buf);
++		addr = addr + offset;
++		RVCE_CS(addr >> 32);
++		RVCE_CS(addr);
++	} else {
++		RVCE_CS(reloc_idx * 4);
++		RVCE_CS(offset);
++	}
++}
+diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
+index 5c6317a..67ba333 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.h
++++ b/src/gallium/drivers/radeon/radeon_vce.h
+@@ -36,13 +36,11 @@
+ 
+ #include "util/u_double_list.h"
+ 
+-#define RVCE_RELOC(buf, usage, domain) (enc->ws->cs_add_reloc(enc->cs, (buf), (usage), domain, RADEON_PRIO_MIN))
+-
+ #define RVCE_CS(value) (enc->cs->buf[enc->cs->cdw++] = (value))
+ #define RVCE_BEGIN(cmd) { uint32_t *begin = &enc->cs->buf[enc->cs->cdw++]; RVCE_CS(cmd)
+-#define RVCE_READ(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READ, domain) * 4)
+-#define RVCE_WRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_WRITE, domain) * 4)
+-#define RVCE_READWRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READWRITE, domain) * 4)
++#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
++#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
++#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+ #define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; }
+ 
+ struct r600_common_screen;
+@@ -101,7 +99,8 @@ struct rvce_encoder {
+ 	struct rvid_buffer		*fb;
+ 	struct rvid_buffer		cpb;
+ 	struct pipe_h264_enc_picture_desc pic;
+-	bool use_vui;
++	bool				use_vm;
++	bool				use_vui;
+ };
+ 
+ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+@@ -111,6 +110,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ 
+ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
+ 
++void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
++		     enum radeon_bo_usage usage, enum radeon_bo_domain domain,
++		     uint32_t offset);
++
+ /* init vce fw 40.2.2 specific callbacks */
+ void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
+ 
+diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+index 0902957..1e7f278 100644
+--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
++++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+@@ -94,8 +94,7 @@ static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
+ static void feedback(struct rvce_encoder *enc)
+ {
+ 	RVCE_BEGIN(0x05000005); // feedback buffer
+-	RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains); // feedbackRingAddressHi
+-	RVCE_CS(0x00000000); // feedbackRingAddressLo
++	RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo
+ 	RVCE_CS(0x00000001); // feedbackRingSize
+ 	RVCE_END();
+ }
+@@ -306,13 +305,11 @@ static void encode(struct rvce_encoder *enc)
+ 	task_info(enc, 0x00000003);
+ 
+ 	RVCE_BEGIN(0x05000001); // context buffer
+-	RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
+-	RVCE_CS(0x00000000); // encodeContextAddressLo
++	RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo
+ 	RVCE_END();
+ 
+ 	RVCE_BEGIN(0x05000004); // video bitstream buffer
+-	RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
+-	RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
++	RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0x0); // videoBitstreamRingAddressHi/Lo
+ 	RVCE_CS(enc->bs_size); // videoBitstreamRingSize
+ 	RVCE_END();
+ 
+@@ -324,10 +321,10 @@ static void encode(struct rvce_encoder *enc)
+ 	RVCE_CS(0x00000000); // insertAUD
+ 	RVCE_CS(0x00000000); // endOfSequence
+ 	RVCE_CS(0x00000000); // endOfStream
+-	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
+-	RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
+-	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
+-	RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
++	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
++		  enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
++	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
++		  enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
+ 	RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
+ 	RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
+ 	RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0016-gallium-radeon-add-h264-performance-HW-decoder-suppo.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0016-gallium-radeon-add-h264-performance-HW-decoder-suppo.patch
new file mode 100644
index 00000000..09468be9
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0016-gallium-radeon-add-h264-performance-HW-decoder-suppo.patch
@@ -0,0 +1,267 @@
+From 97d04773e92c30cef789463e939c722e5f19438c Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Mon, 15 Dec 2014 12:51:50 -0500
+Subject: [PATCH 16/29] gallium/radeon: add h264 performance HW decoder support
+
+v2: -make tonga use new h264 performance HW decoder;
+    -integrate it scaling buffer to msg_fb buffer
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_uvd.c | 63 +++++++++++++++++++++------------
+ src/gallium/drivers/radeon/radeon_uvd.h |  2 ++
+ 2 files changed, 43 insertions(+), 22 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
+index ac850a2..b0bfb3b 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.c
++++ b/src/gallium/drivers/radeon/radeon_uvd.c
+@@ -57,6 +57,7 @@
+ 
+ #define FB_BUFFER_OFFSET 0x1000
+ #define FB_BUFFER_SIZE 2048
++#define IT_SCALING_TABLE_SIZE 224
+ 
+ /* UVD decoder representation */
+ struct ruvd_decoder {
+@@ -65,6 +66,7 @@ struct ruvd_decoder {
+ 	ruvd_set_dtb			set_dtb;
+ 
+ 	unsigned			stream_handle;
++	unsigned			stream_type;
+ 	unsigned			frame_number;
+ 
+ 	struct pipe_screen		*screen;
+@@ -73,9 +75,10 @@ struct ruvd_decoder {
+ 
+ 	unsigned			cur_buffer;
+ 
+-	struct rvid_buffer		msg_fb_buffers[NUM_BUFFERS];
++	struct rvid_buffer		msg_fb_it_buffers[NUM_BUFFERS];
+ 	struct ruvd_msg			*msg;
+ 	uint32_t			*fb;
++	uint8_t				*it;
+ 
+ 	struct rvid_buffer		bs_buffers[NUM_BUFFERS];
+ 	void*				bs_ptr;
+@@ -121,14 +124,14 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
+ 	set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
+ }
+ 
+-/* map the next available message/feedback buffer */
+-static void map_msg_fb_buf(struct ruvd_decoder *dec)
++/* map the next available message/feedback/itscaling buffer */
++static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
+ {
+ 	struct rvid_buffer* buf;
+ 	uint8_t *ptr;
+ 
+ 	/* grab the current message/feedback buffer */
+-	buf = &dec->msg_fb_buffers[dec->cur_buffer];
++	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+ 
+ 	/* and map it for CPU access */
+ 	ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, PIPE_TRANSFER_WRITE);
+@@ -136,6 +139,8 @@ static void map_msg_fb_buf(struct ruvd_decoder *dec)
+ 	/* calc buffer offsets */
+ 	dec->msg = (struct ruvd_msg *)ptr;
+ 	dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
++	if (dec->stream_type == RUVD_CODEC_H264_PERF)
++		dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
+ }
+ 
+ /* unmap and send a message command to the VCPU */
+@@ -148,12 +153,14 @@ static void send_msg_buf(struct ruvd_decoder *dec)
+ 		return;
+ 
+ 	/* grab the current message buffer */
+-	buf = &dec->msg_fb_buffers[dec->cur_buffer];
++	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+ 
+ 	/* unmap the buffer */
+ 	dec->ws->buffer_unmap(buf->res->cs_buf);
+ 	dec->msg = NULL;
+ 	dec->fb = NULL;
++	if (dec->stream_type == RUVD_CODEC_H264_PERF)
++		dec->it = NULL;
+ 
+ 	/* and send it to the hardware */
+ 	send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0,
+@@ -168,11 +175,12 @@ static void next_buffer(struct ruvd_decoder *dec)
+ }
+ 
+ /* convert the profile into something UVD understands */
+-static uint32_t profile2stream_type(enum pipe_video_profile profile)
++static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
+ {
+-	switch (u_reduce_video_profile(profile)) {
++	switch (u_reduce_video_profile(dec->base.profile)) {
+ 	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+-		return RUVD_CODEC_H264;
++		return (family >= CHIP_TONGA) ?
++			RUVD_CODEC_H264_PERF : RUVD_CODEC_H264;
+ 
+ 	case PIPE_VIDEO_FORMAT_VC1:
+ 		return RUVD_CODEC_VC1;
+@@ -565,7 +573,7 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
+ 
+ 	assert(decoder);
+ 
+-	map_msg_fb_buf(dec);
++	map_msg_fb_it_buf(dec);
+ 	memset(dec->msg, 0, sizeof(*dec->msg));
+ 	dec->msg->size = sizeof(*dec->msg);
+ 	dec->msg->msg_type = RUVD_MSG_DESTROY;
+@@ -577,7 +585,7 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
+ 	dec->ws->cs_destroy(dec->cs);
+ 
+ 	for (i = 0; i < NUM_BUFFERS; ++i) {
+-		rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
++		rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ 		rvid_destroy_buffer(&dec->bs_buffers[i]);
+ 	}
+ 
+@@ -679,7 +687,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
+ {
+ 	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ 	struct radeon_winsys_cs_handle *dt;
+-	struct rvid_buffer *msg_fb_buf, *bs_buf;
++	struct rvid_buffer *msg_fb_it_buf, *bs_buf;
+ 	unsigned bs_size;
+ 
+ 	assert(decoder);
+@@ -687,32 +695,37 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
+ 	if (!dec->bs_ptr)
+ 		return;
+ 
+-	msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer];
++	msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+ 	bs_buf = &dec->bs_buffers[dec->cur_buffer];
+ 
+ 	bs_size = align(dec->bs_size, 128);
+ 	memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
+ 	dec->ws->buffer_unmap(bs_buf->res->cs_buf);
+ 
+-	map_msg_fb_buf(dec);
++	map_msg_fb_it_buf(dec);
+ 	dec->msg->size = sizeof(*dec->msg);
+ 	dec->msg->msg_type = RUVD_MSG_DECODE;
+ 	dec->msg->stream_handle = dec->stream_handle;
+ 	dec->msg->status_report_feedback_number = dec->frame_number;
+ 
+-	dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile);
++	dec->msg->body.decode.stream_type = dec->stream_type;
+ 	dec->msg->body.decode.decode_flags = 0x1;
+ 	dec->msg->body.decode.width_in_samples = dec->base.width;
+ 	dec->msg->body.decode.height_in_samples = dec->base.height;
+ 
+ 	dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
+ 	dec->msg->body.decode.bsd_size = bs_size;
++	dec->msg->body.decode.db_pitch = dec->base.width;
+ 
+ 	dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
+ 
+ 	switch (u_reduce_video_profile(picture->profile)) {
+ 	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ 		dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
++		if (dec->stream_type == RUVD_CODEC_H264_PERF) {
++			memcpy(dec->it, dec->msg->body.decode.codec.h264.scaling_list_4x4, 6*16);
++			memcpy((dec->it + 96), dec->msg->body.decode.codec.h264.scaling_list_8x8, 2*64);
++		}
+ 		break;
+ 
+ 	case PIPE_VIDEO_FORMAT_VC1:
+@@ -746,8 +759,11 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
+ 		 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+ 	send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
+ 		 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
+-	send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->res->cs_buf,
++	send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->cs_buf,
+ 		 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
++	if (dec->stream_type == RUVD_CODEC_H264_PERF)
++		send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->cs_buf,
++			 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+ 	set_reg(dec, RUVD_ENGINE_CNTL, 1);
+ 
+ 	flush(dec);
+@@ -815,6 +831,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ 	dec->base.end_frame = ruvd_end_frame;
+ 	dec->base.flush = ruvd_flush;
+ 
++	dec->stream_type = profile2stream_type(dec, info.family);
+ 	dec->set_dtb = set_dtb;
+ 	dec->stream_handle = rvid_alloc_stream_handle();
+ 	dec->screen = context->screen;
+@@ -827,10 +844,12 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ 
+ 	bs_buf_size = width * height * 512 / (16 * 16);
+ 	for (i = 0; i < NUM_BUFFERS; ++i) {
+-		unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
++		unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
+ 		STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
+-		if (!rvid_create_buffer(dec->screen, &dec->msg_fb_buffers[i],
+-					msg_fb_size, PIPE_USAGE_STAGING)) {
++		if (dec->stream_type == RUVD_CODEC_H264_PERF)
++			msg_fb_it_size += IT_SCALING_TABLE_SIZE;
++		if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
++					msg_fb_it_size, PIPE_USAGE_STAGING)) {
+ 			RVID_ERR("Can't allocated message buffers.\n");
+ 			goto error;
+ 		}
+@@ -841,7 +860,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ 			goto error;
+ 		}
+ 
+-		rvid_clear_buffer(context, &dec->msg_fb_buffers[i]);
++		rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
+ 		rvid_clear_buffer(context, &dec->bs_buffers[i]);
+ 	}
+ 
+@@ -852,11 +871,11 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ 
+ 	rvid_clear_buffer(context, &dec->dpb);
+ 
+-	map_msg_fb_buf(dec);
++	map_msg_fb_it_buf(dec);
+ 	dec->msg->size = sizeof(*dec->msg);
+ 	dec->msg->msg_type = RUVD_MSG_CREATE;
+ 	dec->msg->stream_handle = dec->stream_handle;
+-	dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile);
++	dec->msg->body.create.stream_type = dec->stream_type;
+ 	dec->msg->body.create.width_in_samples = dec->base.width;
+ 	dec->msg->body.create.height_in_samples = dec->base.height;
+ 	dec->msg->body.create.dpb_size = dec->dpb.res->buf->size;
+@@ -870,7 +889,7 @@ error:
+ 	if (dec->cs) dec->ws->cs_destroy(dec->cs);
+ 
+ 	for (i = 0; i < NUM_BUFFERS; ++i) {
+-		rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
++		rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ 		rvid_destroy_buffer(&dec->bs_buffers[i]);
+ 	}
+ 
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h
+index 7442865..5b6c65c 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.h
++++ b/src/gallium/drivers/radeon/radeon_uvd.h
+@@ -62,6 +62,7 @@
+ #define RUVD_CMD_DECODING_TARGET_BUFFER	0x00000002
+ #define RUVD_CMD_FEEDBACK_BUFFER	0x00000003
+ #define RUVD_CMD_BITSTREAM_BUFFER	0x00000100
++#define RUVD_CMD_ITSCALING_TABLE_BUFFER	0x00000204
+ 
+ /* UVD message types */
+ #define RUVD_MSG_CREATE		0
+@@ -73,6 +74,7 @@
+ #define RUVD_CODEC_VC1		0x00000001
+ #define RUVD_CODEC_MPEG2	0x00000003
+ #define RUVD_CODEC_MPEG4	0x00000004
++#define RUVD_CODEC_H264_PERF	0x00000007
+ 
+ /* UVD decode target buffer tiling mode */
+ #define RUVD_TILE_LINEAR	0x00000000
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0017-radeon-vce-make-firmware-check-compatible-with-new-f.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0017-radeon-vce-make-firmware-check-compatible-with-new-f.patch
new file mode 100644
index 00000000..956a722a
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0017-radeon-vce-make-firmware-check-compatible-with-new-f.patch
@@ -0,0 +1,29 @@
+From d2bfa3f43f6a8f905cf3115371bb82aa347315b7 Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Fri, 6 Feb 2015 12:58:23 -0500
+Subject: [PATCH 17/29] radeon/vce: make firmware check compatible with new
+ firmware
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_vce.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
+index d1495a2..b8b35b3 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.c
++++ b/src/gallium/drivers/radeon/radeon_vce.c
+@@ -428,7 +428,7 @@ error:
+  */
+ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
+ {
+-	return rscreen->info.vce_fw_version == ((40 << 24) | (2 << 16) | (2 << 8));
++	return rscreen->info.vce_fw_version >= ((40 << 24) | (2 << 16) | (2 << 8));
+ }
+ 
+ /**
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0018-radeon-vce-adapt-new-firmware-interface-changes.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0018-radeon-vce-adapt-new-firmware-interface-changes.patch
new file mode 100644
index 00000000..915d6cab
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0018-radeon-vce-adapt-new-firmware-interface-changes.patch
@@ -0,0 +1,89 @@
+From b0624db0423f37e45038aa163f9699a4453b8945 Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Wed, 11 Feb 2015 15:22:21 -0500
+Subject: [PATCH 18/29] radeon/vce: adapt new firmware interface changes
+
+v2: make this also compatible with original released firmware
+v3: rebase by Marek
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_vce.c        |  1 +
+ src/gallium/drivers/radeon/radeon_vce.h        |  1 +
+ src/gallium/drivers/radeon/radeon_vce_40_2_2.c | 13 +++++++++++--
+ 3 files changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
+index b8b35b3..d5ae26d 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.c
++++ b/src/gallium/drivers/radeon/radeon_vce.c
+@@ -353,6 +353,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ 	if (!enc)
+ 		return NULL;
+ 
++	enc->fw_ver = rscreen->info.vce_fw_version;
+ 	if (rscreen->info.drm_major == 3)
+ 		enc->use_vm = true;
+ 	if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
+diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
+index 67ba333..7d37320 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.h
++++ b/src/gallium/drivers/radeon/radeon_vce.h
+@@ -100,6 +100,7 @@ struct rvce_encoder {
+ 	struct rvid_buffer		cpb;
+ 	struct pipe_h264_enc_picture_desc pic;
+ 	bool				use_vm;
++	unsigned			fw_ver;
+ 	bool				use_vui;
+ };
+ 
+diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+index 1e7f278..f1f4cce 100644
+--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
++++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+@@ -44,6 +44,8 @@
+ #include "radeon_video.h"
+ #include "radeon_vce.h"
+ 
++#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
++
+ static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
+ 
+ static struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
+@@ -145,6 +147,10 @@ static void rate_control(struct rvce_encoder *enc)
+ 	RVCE_CS(0x00000000); // encBPicsDeltaQP
+ 	RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP
+ 	RVCE_CS(0x00000000); // encRateControlReInitDisable
++	if (enc->fw_ver > FW_40_2_2) {
++		RVCE_CS(0x00000000); // encLCVBRInitQPFlag
++		RVCE_CS(0x00000000); // encLCVBRSATDBasedNonlinearBitBudgetFlag
++	}
+ 	RVCE_END();
+ }
+ 
+@@ -314,7 +320,10 @@ static void encode(struct rvce_encoder *enc)
+ 	RVCE_END();
+ 
+ 	RVCE_BEGIN(0x03000001); // encode
+-	RVCE_CS(0x00000000); // insertHeaders
++	if ((enc->fw_ver > FW_40_2_2) && (!enc->pic.frame_num))
++		RVCE_CS(0x00000011); // insertHeaders
++	else
++		RVCE_CS(0x00000000); // insertHeaders
+ 	RVCE_CS(0x00000000); // pictureStructure
+ 	RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize
+ 	RVCE_CS(0x00000000); // forceRefreshMap
+@@ -328,7 +337,7 @@ static void encode(struct rvce_encoder *enc)
+ 	RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
+ 	RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
+ 	RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+-	RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode
++	RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ 	RVCE_CS(0x00000000); // encInputPicTileConfig
+ 	RVCE_CS(enc->pic.picture_type); // encPicType
+ 	RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0019-radeon-video-add-4K-support-for-decode-encode-parame.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0019-radeon-video-add-4K-support-for-decode-encode-parame.patch
new file mode 100644
index 00000000..66670644
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0019-radeon-video-add-4K-support-for-decode-encode-parame.patch
@@ -0,0 +1,48 @@
+From 94b8d8d7ec638b16d62a1bddb060925ced9f50b7 Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Mon, 9 Mar 2015 16:24:48 -0400
+Subject: [PATCH 19/29] radeon/video: add 4K support for decode/encode
+ parameters
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_video.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
+index 826e076..65949fb 100644
+--- a/src/gallium/drivers/radeon/radeon_video.c
++++ b/src/gallium/drivers/radeon/radeon_video.c
+@@ -214,9 +214,9 @@ int rvid_get_video_param(struct pipe_screen *screen,
+ 	        case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+         	        return 1;
+ 	        case PIPE_VIDEO_CAP_MAX_WIDTH:
+-        	        return 2048;
++			return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
+ 	        case PIPE_VIDEO_CAP_MAX_HEIGHT:
+-        	        return 1152;
++			return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
+ 	        case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+         	        return PIPE_FORMAT_NV12;
+ 	        case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+@@ -268,9 +268,9 @@ int rvid_get_video_param(struct pipe_screen *screen,
+ 	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ 		return 1;
+ 	case PIPE_VIDEO_CAP_MAX_WIDTH:
+-		return 2048;
++		return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
+ 	case PIPE_VIDEO_CAP_MAX_HEIGHT:
+-		return 1152;
++		return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
+ 	case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ 		return PIPE_FORMAT_NV12;
+ 	case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0020-radeon-uvd-recalculate-dbp-buffer-size.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0020-radeon-uvd-recalculate-dbp-buffer-size.patch
new file mode 100644
index 00000000..4dfbfcc9
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0020-radeon-uvd-recalculate-dbp-buffer-size.patch
@@ -0,0 +1,145 @@
+From f42b0415fa5e9a4525d378c1c9c71a3934fd2a3a Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Thu, 12 Mar 2015 16:13:44 -0400
+Subject: [PATCH 20/29] radeon/uvd: recalculate dbp buffer size
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_uvd.c | 81 ++++++++++++++++++++++++---------
+ 1 file changed, 59 insertions(+), 22 deletions(-)
+
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
+index b0bfb3b..38eb3d1 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.c
++++ b/src/gallium/drivers/radeon/radeon_uvd.c
+@@ -198,16 +198,16 @@ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
+ }
+ 
+ /* calculate size of reference picture buffer */
+-static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
++static unsigned calc_dpb_size(struct ruvd_decoder *dec)
+ {
+ 	unsigned width_in_mb, height_in_mb, image_size, dpb_size;
+ 
+ 	// always align them to MB size for dpb calculation
+-	unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH);
+-	unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT);
++	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
++	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+ 
+ 	// always one more for currently decoded picture
+-	unsigned max_references = templ->max_references + 1;
++	unsigned max_references = dec->base.max_references + 1;
+ 
+ 	// aligned size of a single frame
+ 	image_size = width * height;
+@@ -218,20 +218,57 @@ static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
+ 	width_in_mb = width / VL_MACROBLOCK_WIDTH;
+ 	height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
+ 
+-	switch (u_reduce_video_profile(templ->profile)) {
+-	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+-		// the firmware seems to allways assume a minimum of ref frames
+-		max_references = MAX2(NUM_H264_REFS, max_references);
+-
+-		// reference picture buffer
+-		dpb_size = image_size * max_references;
+-
+-		// macroblock context buffer
+-		dpb_size += width_in_mb * height_in_mb * max_references * 192;
+-
+-		// IT surface buffer
+-		dpb_size += width_in_mb * height_in_mb * 32;
++	switch (u_reduce_video_profile(dec->base.profile)) {
++	case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
++		if (!dec->use_legacy) {
++			unsigned fs_in_mb = width_in_mb * height_in_mb;
++			unsigned alignment = 64, num_dpb_buffer;
++
++			if (dec->stream_type == RUVD_CODEC_H264_PERF)
++				alignment = 256;
++			switch(dec->base.level) {
++			case 30:
++				num_dpb_buffer = 8100 / fs_in_mb;
++				break;
++			case 31:
++				num_dpb_buffer = 18000 / fs_in_mb;
++				break;
++			case 32:
++				num_dpb_buffer = 20480 / fs_in_mb;
++				break;
++			case 41:
++				num_dpb_buffer = 32768 / fs_in_mb;
++				break;
++			case 42:
++				num_dpb_buffer = 34816 / fs_in_mb;
++				break;
++			case 50:
++				num_dpb_buffer = 110400 / fs_in_mb;
++				break;
++			case 51:
++				num_dpb_buffer = 184320 / fs_in_mb;
++				break;
++			default:
++				num_dpb_buffer = 184320 / fs_in_mb;
++				break;
++			}
++			num_dpb_buffer++;
++			max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
++			dpb_size = image_size * max_references;
++			dpb_size += max_references * align(width_in_mb * height_in_mb  * 192, alignment);
++			dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
++		} else {
++			// the firmware seems to allways assume a minimum of ref frames
++			max_references = MAX2(NUM_H264_REFS, max_references);
++			// reference picture buffer
++			dpb_size = image_size * max_references;
++			// macroblock context buffer
++			dpb_size += width_in_mb * height_in_mb * max_references * 192;
++			// IT surface buffer
++			dpb_size += width_in_mb * height_in_mb * 32;
++		}
+ 		break;
++	}
+ 
+ 	case PIPE_VIDEO_FORMAT_VC1:
+ 		// the firmware seems to allways assume a minimum of ref frames
+@@ -303,10 +340,8 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
+ 		assert(0);
+ 		break;
+ 	}
+-	if (((dec->base.width * dec->base.height) >> 8) <= 1620)
+-		result.level = 30;
+-	else
+-		result.level = 41;
++
++	result.level = dec->base.level;
+ 
+ 	result.sps_info_flags = 0;
+ 	result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
+@@ -785,7 +820,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ 					     ruvd_set_dtb set_dtb)
+ {
+ 	struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
+-	unsigned dpb_size = calc_dpb_size(templ);
++	unsigned dpb_size;
+ 	unsigned width = templ->width, height = templ->height;
+ 	unsigned bs_buf_size;
+ 	struct radeon_info info;
+@@ -864,6 +899,8 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ 		rvid_clear_buffer(context, &dec->bs_buffers[i]);
+ 	}
+ 
++	dpb_size = calc_dpb_size(dec);
++
+ 	if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
+ 		RVID_ERR("Can't allocated dpb.\n");
+ 		goto error;
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0021-radeon-uvd-make-30M-as-minimum-for-MPEG4-dpb-buffer-.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0021-radeon-uvd-make-30M-as-minimum-for-MPEG4-dpb-buffer-.patch
new file mode 100644
index 00000000..3873845d
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0021-radeon-uvd-make-30M-as-minimum-for-MPEG4-dpb-buffer-.patch
@@ -0,0 +1,32 @@
+From 2ff165c0f33193fd9ae6ae6f373c0416b14d8584 Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Thu, 12 Mar 2015 16:24:57 -0400
+Subject: [PATCH 21/29] radeon/uvd: make 30M as minimum for MPEG4 dpb buffer
+ size
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_uvd.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
+index 38eb3d1..a712907 100644
+--- a/src/gallium/drivers/radeon/radeon_uvd.c
++++ b/src/gallium/drivers/radeon/radeon_uvd.c
+@@ -304,6 +304,8 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
+ 
+ 		// IT surface buffer
+ 		dpb_size += align(width_in_mb * height_in_mb * 32, 64);
++
++		dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
+ 		break;
+ 
+ 	default:
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0022-radeon-vce-implement-VCE-two-pipe-support.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0022-radeon-vce-implement-VCE-two-pipe-support.patch
new file mode 100644
index 00000000..98feea2e
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0022-radeon-vce-implement-VCE-two-pipe-support.patch
@@ -0,0 +1,96 @@
+From 411aabe76ae9621510d038b84e755fe583b54a7b Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Wed, 15 Apr 2015 12:36:32 -0400
+Subject: [PATCH 22/29] radeon/vce: implement VCE two pipe support
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+v2: rebase by Marek
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeon/radeon_vce.c        |  5 +++++
+ src/gallium/drivers/radeon/radeon_vce.h        |  4 ++++
+ src/gallium/drivers/radeon/radeon_vce_40_2_2.c | 17 +++++++++++++++++
+ 3 files changed, 26 insertions(+)
+
+diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
+index d5ae26d..79b0909 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.c
++++ b/src/gallium/drivers/radeon/radeon_vce.c
+@@ -358,6 +358,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ 		enc->use_vm = true;
+ 	if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
+ 		enc->use_vui = true;
++	if (rscreen->info.family >= CHIP_TONGA)
++		enc->use_2p = true;
+ 
+ 	enc->base = *templ;
+ 	enc->base.context = context;
+@@ -397,6 +399,9 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ 	cpb_size = cpb_size * align(tmp_surf->npix_y, 16);
+ 	cpb_size = cpb_size * 3 / 2;
+ 	cpb_size = cpb_size * enc->cpb_num;
++	if (enc->use_2p)
++		cpb_size +=  RVCE_MAX_AUX_BUFFER_NUM *
++			RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
+ 	tmp_buf->destroy(tmp_buf);
+ 	if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+ 		RVID_ERR("Can't create CPB buffer.\n");
+diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
+index 7d37320..4d07204 100644
+--- a/src/gallium/drivers/radeon/radeon_vce.h
++++ b/src/gallium/drivers/radeon/radeon_vce.h
+@@ -43,6 +43,9 @@
+ #define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+ #define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; }
+ 
++#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
++#define RVCE_MAX_AUX_BUFFER_NUM 4
++
+ struct r600_common_screen;
+ 
+ /* driver dependent callback */
+@@ -102,6 +105,7 @@ struct rvce_encoder {
+ 	bool				use_vm;
+ 	unsigned			fw_ver;
+ 	bool				use_vui;
++	bool				use_2p;
+ };
+ 
+ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+index f1f4cce..970d572 100644
+--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
++++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+@@ -319,6 +319,23 @@ static void encode(struct rvce_encoder *enc)
+ 	RVCE_CS(enc->bs_size); // videoBitstreamRingSize
+ 	RVCE_END();
+ 
++	if (enc->use_2p) {
++		unsigned aux_offset = enc->cpb.res->buf->size -
++			RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
++		RVCE_BEGIN(0x05000002); // auxiliary buffer
++		for (i = 0; i < 4; ++i) {
++			RVCE_CS(aux_offset);
++			aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
++		}
++		for (i = 0; i < 4; ++i)
++			RVCE_CS(0x00000000);
++		for (i = 0; i < 4; ++i)
++			RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE);
++		for (i = 0; i < 4; ++i)
++			RVCE_CS(0x00000000);
++		RVCE_END();
++	}
++
+ 	RVCE_BEGIN(0x03000001); // encode
+ 	if ((enc->fw_ver > FW_40_2_2) && (!enc->pic.frame_num))
+ 		RVCE_CS(0x00000011); // insertHeaders
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0023-radeonsi-add-new-VI-PCI-IDs.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0023-radeonsi-add-new-VI-PCI-IDs.patch
new file mode 100644
index 00000000..00762071
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0023-radeonsi-add-new-VI-PCI-IDs.patch
@@ -0,0 +1,41 @@
+From a8ff4160a2bd41c2d59681fb58e0422c60e26778 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Thu, 16 Apr 2015 22:59:41 +0200
+Subject: [PATCH 23/29] radeonsi: add new VI PCI IDs
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ include/pci_ids/radeonsi_pci_ids.h | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h
+index 571e863..ca81f76 100644
+--- a/include/pci_ids/radeonsi_pci_ids.h
++++ b/include/pci_ids/radeonsi_pci_ids.h
+@@ -155,3 +155,23 @@ CHIPSET(0x67B8, HAWAII_67B8, HAWAII)
+ CHIPSET(0x67B9, HAWAII_67B9, HAWAII)
+ CHIPSET(0x67BA, HAWAII_67BA, HAWAII)
+ CHIPSET(0x67BE, HAWAII_67BE, HAWAII)
++
++CHIPSET(0x6900, ICELAND_, ICELAND)
++CHIPSET(0x6901, ICELAND_, ICELAND)
++CHIPSET(0x6902, ICELAND_, ICELAND)
++CHIPSET(0x6903, ICELAND_, ICELAND)
++CHIPSET(0x6907, ICELAND_, ICELAND)
++
++CHIPSET(0x6920, TONGA_, TONGA)
++CHIPSET(0x6921, TONGA_, TONGA)
++CHIPSET(0x6928, TONGA_, TONGA)
++CHIPSET(0x692B, TONGA_, TONGA)
++CHIPSET(0x692F, TONGA_, TONGA)
++CHIPSET(0x6938, TONGA_, TONGA)
++CHIPSET(0x6939, TONGA_, TONGA)
++
++CHIPSET(0x9870, CARRIZO_, CARRIZO)
++CHIPSET(0x9874, CARRIZO_, CARRIZO)
++CHIPSET(0x9875, CARRIZO_, CARRIZO)
++CHIPSET(0x9876, CARRIZO_, CARRIZO)
++CHIPSET(0x9877, CARRIZO_, CARRIZO)
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0024-gallium-util-get-h264-level-based-on-number-of-max-r.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0024-gallium-util-get-h264-level-based-on-number-of-max-r.patch
new file mode 100644
index 00000000..3a4a63d8
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0024-gallium-util-get-h264-level-based-on-number-of-max-r.patch
@@ -0,0 +1,76 @@
+From 4e9dbaaafed23588945a09617796baa91194dc57 Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Thu, 12 Mar 2015 14:01:52 -0400
+Subject: [PATCH 24/29] gallium/util: get h264 level based on number of max
+ references and resolution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+v2: add commments for limitation of max references numbers,
+and what the caculation is based
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/auxiliary/util/u_video.h | 36 ++++++++++++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
+
+diff --git a/src/gallium/auxiliary/util/u_video.h b/src/gallium/auxiliary/util/u_video.h
+index 45b2d6e..b4743d1 100644
+--- a/src/gallium/auxiliary/util/u_video.h
++++ b/src/gallium/auxiliary/util/u_video.h
+@@ -38,6 +38,7 @@ extern "C" {
+ /* u_reduce_video_profile() needs these */
+ #include "pipe/p_compiler.h"
+ #include "util/u_debug.h"
++#include "util/u_math.h"
+ 
+ static INLINE enum pipe_video_format
+ u_reduce_video_profile(enum pipe_video_profile profile)
+@@ -146,6 +147,41 @@ u_copy_swap422_packed(void *const *destination_data,
+    }
+ }
+ 
++static INLINE uint32_t
++u_get_h264_level(uint32_t width, uint32_t height, uint32_t *max_reference)
++{
++   uint32_t max_dpb_mbs;
++
++   width = align(width, 16);
++   height = align(height, 16);
++
++   /* Max references will be used for caculation of number of DPB buffers
++      in the UVD driver, limitation of max references is 16. Some client
++      like mpv application for VA-API, it requires references more than that,
++      so we have to set max of references to 16 here. */
++   *max_reference = MIN2(*max_reference, 16);
++   max_dpb_mbs = (width / 16) * (height / 16) * *max_reference;
++
++   /* The calculation is based on "Decoded picture buffering" section
++      from http://en.wikipedia.org/wiki/H.264/MPEG-4_AVC */
++   if (max_dpb_mbs <= 8100)
++      return 30;
++   else if (max_dpb_mbs <= 18000)
++      return 31;
++   else if (max_dpb_mbs <= 20480)
++      return 32;
++   else if (max_dpb_mbs <= 32768)
++      return 41;
++   else if (max_dpb_mbs <= 34816)
++      return 42;
++   else if (max_dpb_mbs <= 110400)
++      return 50;
++   else if (max_dpb_mbs <= 184320)
++      return 51;
++   else
++      return 52;
++}
++
+ #ifdef __cplusplus
+ }
+ #endif
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0025-st-vdpau-add-h264-decoder-level-support.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0025-st-vdpau-add-h264-decoder-level-support.patch
new file mode 100644
index 00000000..729d11cc
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0025-st-vdpau-add-h264-decoder-level-support.patch
@@ -0,0 +1,34 @@
+From 2ccc95646ab8c004b0073cc6f01f9359ddc495a2 Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Thu, 12 Mar 2015 14:09:49 -0400
+Subject: [PATCH 25/29] st/vdpau: add h264 decoder level support
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/state_trackers/vdpau/decode.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
+index 767d311..0634ba7 100644
+--- a/src/gallium/state_trackers/vdpau/decode.c
++++ b/src/gallium/state_trackers/vdpau/decode.c
+@@ -118,6 +118,11 @@ vlVdpDecoderCreate(VdpDevice device,
+    templat.height = height;
+    templat.max_references = max_references;
+ 
++   if (u_reduce_video_profile(templat.profile) ==
++       PIPE_VIDEO_FORMAT_MPEG4_AVC)
++      templat.level = u_get_h264_level(templat.width, templat.height,
++                            &templat.max_references);
++
+    vldecoder->decoder = pipe->create_video_codec(pipe, &templat);
+ 
+    if (!vldecoder->decoder) {
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0026-st-omx-dec-separate-create_video_codec-to-different-.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0026-st-omx-dec-separate-create_video_codec-to-different-.patch
new file mode 100644
index 00000000..6bd9c12e
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0026-st-omx-dec-separate-create_video_codec-to-different-.patch
@@ -0,0 +1,123 @@
+From 2261e94a5b6a67ec9142cd3c71d824fec35b70c2 Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Fri, 13 Mar 2015 12:25:42 -0400
+Subject: [PATCH 26/29] st/omx/dec: separate create_video_codec to different
+ codecs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+v2: get frame size from port info
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/state_trackers/omx/vid_dec.c        | 18 ------------------
+ src/gallium/state_trackers/omx/vid_dec.h        |  1 +
+ src/gallium/state_trackers/omx/vid_dec_h264.c   | 15 +++++++++++++++
+ src/gallium/state_trackers/omx/vid_dec_mpeg12.c | 14 ++++++++++++++
+ 4 files changed, 30 insertions(+), 18 deletions(-)
+
+diff --git a/src/gallium/state_trackers/omx/vid_dec.c b/src/gallium/state_trackers/omx/vid_dec.c
+index 13f4f55..9e7e7ba 100644
+--- a/src/gallium/state_trackers/omx/vid_dec.c
++++ b/src/gallium/state_trackers/omx/vid_dec.c
+@@ -44,8 +44,6 @@
+ #include <bellagio/omxcore.h>
+ #endif
+ 
+-#include <bellagio/omx_base_video_port.h>
+-
+ #include "pipe/p_screen.h"
+ #include "pipe/p_video_codec.h"
+ #include "util/u_memory.h"
+@@ -364,22 +362,6 @@ static OMX_ERRORTYPE vid_dec_MessageHandler(OMX_COMPONENTTYPE* comp, internalReq
+ 
+    if (msg->messageType == OMX_CommandStateSet) {
+       if ((msg->messageParam == OMX_StateIdle ) && (priv->state == OMX_StateLoaded)) {
+-
+-         struct pipe_video_codec templat = {};
+-         omx_base_video_PortType *port;
+-
+-         port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+-
+-         templat.profile = priv->profile;
+-         templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
+-         templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+-         templat.width = port->sPortParam.format.video.nFrameWidth;
+-         templat.height = port->sPortParam.format.video.nFrameHeight;
+-         templat.max_references = 2;
+-         templat.expect_chunked_decode = true;
+-
+-         priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat);
+-
+          if (priv->profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN)
+             vid_dec_mpeg12_Init(priv);
+          else if (priv->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH)
+diff --git a/src/gallium/state_trackers/omx/vid_dec.h b/src/gallium/state_trackers/omx/vid_dec.h
+index 9acf872..1c51f9c 100644
+--- a/src/gallium/state_trackers/omx/vid_dec.h
++++ b/src/gallium/state_trackers/omx/vid_dec.h
+@@ -44,6 +44,7 @@
+ 
+ #include <bellagio/st_static_component_loader.h>
+ #include <bellagio/omx_base_filter.h>
++#include <bellagio/omx_base_video_port.h>
+ 
+ #include "pipe/p_video_state.h"
+ #include "state_tracker/drm_driver.h"
+diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c
+index e01e873..7c90dee 100644
+--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
++++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
+@@ -105,6 +105,21 @@ static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
+ 
+    priv->picture.h264.num_ref_frames = priv->picture.h264.pps->sps->max_num_ref_frames;
+ 
++   if (!priv->codec) {
++      struct pipe_video_codec templat = {};
++      omx_base_video_PortType *port;
++
++      port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
++      templat.profile = priv->profile;
++      templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
++      templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
++      templat.max_references = 2;
++      templat.expect_chunked_decode = true;
++      templat.width = port->sPortParam.format.video.nFrameWidth;
++      templat.height = port->sPortParam.format.video.nFrameHeight;
++
++      priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat);
++   }
+    priv->codec->begin_frame(priv->codec, priv->target, &priv->picture.base);
+    priv->frame_started = true;
+ }
+diff --git a/src/gallium/state_trackers/omx/vid_dec_mpeg12.c b/src/gallium/state_trackers/omx/vid_dec_mpeg12.c
+index de4c69a..bef83ec 100644
+--- a/src/gallium/state_trackers/omx/vid_dec_mpeg12.c
++++ b/src/gallium/state_trackers/omx/vid_dec_mpeg12.c
+@@ -65,6 +65,20 @@ static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv)
+ 
+ void vid_dec_mpeg12_Init(vid_dec_PrivateType *priv)
+ {
++   struct pipe_video_codec templat = {};
++   omx_base_video_PortType *port;
++
++   port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
++   templat.profile = priv->profile;
++   templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
++   templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
++   templat.max_references = 2;
++   templat.expect_chunked_decode = true;
++   templat.width = port->sPortParam.format.video.nFrameWidth;
++   templat.height = port->sPortParam.format.video.nFrameHeight;
++
++   priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat);
++
+    priv->picture.base.profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN;
+    priv->picture.mpeg12.intra_matrix = default_intra_matrix;
+    priv->picture.mpeg12.non_intra_matrix = default_non_intra_matrix;
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0027-vl-add-level-idc-in-sps.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0027-vl-add-level-idc-in-sps.patch
new file mode 100644
index 00000000..e084140c
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0027-vl-add-level-idc-in-sps.patch
@@ -0,0 +1,30 @@
+From 6804e81546f265be09aa3d496ec6ac2fb4a1665e Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Mon, 16 Mar 2015 15:06:30 -0400
+Subject: [PATCH 27/29] vl: add level idc in sps
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/include/pipe/p_video_state.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
+index 6621dbd..3713cd9 100644
+--- a/src/gallium/include/pipe/p_video_state.h
++++ b/src/gallium/include/pipe/p_video_state.h
+@@ -271,6 +271,7 @@ struct pipe_vc1_picture_desc
+ 
+ struct pipe_h264_sps
+ {
++   uint8_t  level_idc;
+    uint8_t  chroma_format_idc;
+    uint8_t  separate_colour_plane_flag;
+    uint8_t  bit_depth_luma_minus8;
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0028-st-omx-dec-add-h264-decoder-level-support.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0028-st-omx-dec-add-h264-decoder-level-support.patch
new file mode 100644
index 00000000..0b59004a
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0028-st-omx-dec-add-h264-decoder-level-support.patch
@@ -0,0 +1,72 @@
+From fdf7e12f010fd7650b767f726cb27e62321e6573 Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Fri, 13 Mar 2015 12:39:26 -0400
+Subject: [PATCH 28/29] st/omx/dec: add h264 decoder level support
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+v2: use sps level idc as level to driver
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/state_trackers/omx/vid_dec_h264.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c
+index 7c90dee..18d8803 100644
+--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
++++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
+@@ -33,6 +33,7 @@
+ 
+ #include "pipe/p_video_codec.h"
+ #include "util/u_memory.h"
++#include "util/u_video.h"
+ #include "vl/vl_rbsp.h"
+ 
+ #include "entrypoint.h"
+@@ -113,10 +114,11 @@ static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
+       templat.profile = priv->profile;
+       templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
+       templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+-      templat.max_references = 2;
++      templat.max_references = priv->picture.h264.num_ref_frames;
+       templat.expect_chunked_decode = true;
+       templat.width = port->sPortParam.format.video.nFrameWidth;
+       templat.height = port->sPortParam.format.video.nFrameHeight;
++      templat.level = priv->picture.h264.pps->sps->level_idc;
+ 
+       priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat);
+    }
+@@ -239,7 +241,7 @@ static struct pipe_h264_sps *seq_parameter_set_id(vid_dec_PrivateType *priv, str
+ static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+ {
+    struct pipe_h264_sps *sps;
+-   unsigned profile_idc;
++   unsigned profile_idc, level_idc;
+    unsigned i;
+ 
+    /* Sequence parameter set */
+@@ -267,7 +269,7 @@ static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+    vl_rbsp_u(rbsp, 2);
+ 
+    /* level_idc */
+-   vl_rbsp_u(rbsp, 8);
++   level_idc = vl_rbsp_u(rbsp, 8);
+ 
+    sps = seq_parameter_set_id(priv, rbsp);
+    if (!sps)
+@@ -277,6 +279,8 @@ static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+    memset(sps->ScalingList4x4, 16, sizeof(sps->ScalingList4x4));
+    memset(sps->ScalingList8x8, 16, sizeof(sps->ScalingList8x8));
+ 
++   sps->level_idc = level_idc;
++
+    if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 ||
+        profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 ||
+        profile_idc == 128 || profile_idc == 138) {
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0029-st-va-add-h264-decoder-level-support.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0029-st-va-add-h264-decoder-level-support.patch
new file mode 100644
index 00000000..a06168a3
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0029-st-va-add-h264-decoder-level-support.patch
@@ -0,0 +1,34 @@
+From b1cb817f8f37d16aba15e610ca3a7df7aed23bcf Mon Sep 17 00:00:00 2001
+From: Leo Liu <leo.liu@amd.com>
+Date: Thu, 12 Mar 2015 14:29:21 -0400
+Subject: [PATCH 29/29] st/va: add h264 decoder level support
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Leo Liu <leo.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/state_trackers/va/context.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
+index a7a55f9..0a733b3 100644
+--- a/src/gallium/state_trackers/va/context.c
++++ b/src/gallium/state_trackers/va/context.c
+@@ -167,6 +167,11 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width,
+    templat.max_references = num_render_targets;
+    templat.expect_chunked_decode = true;
+ 
++   if (u_reduce_video_profile(templat.profile) ==
++       PIPE_VIDEO_FORMAT_MPEG4_AVC)
++      templat.level = u_get_h264_level(templat.width, templat.height,
++                            &templat.max_references);
++
+    context->decoder = drv->pipe->create_video_codec(drv->pipe, &templat);
+    if (!context->decoder) {
+       FREE(context);
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0030-radeonsi-properly-set-the-raster_config-for-KV.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0030-radeonsi-properly-set-the-raster_config-for-KV.patch
new file mode 100644
index 00000000..9ed40181
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0030-radeonsi-properly-set-the-raster_config-for-KV.patch
@@ -0,0 +1,53 @@
+From 7f720fc229da2b75b60ddc9824f19ebd1acc42df Mon Sep 17 00:00:00 2001
+From: Arindam Nath <arindam.nath@amd.com>
+Date: Thu, 11 Jun 2015 14:07:38 +0530
+Subject: [PATCH 1/2] radeonsi: properly set the raster_config for KV
+
+This enables the second RB on asics that support it which
+should boost performance.
+
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: mesa-stable@lists.freedesktop.org
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeonsi/si_state.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
+index b35fbd5..b3e77ec 100644
+--- a/src/gallium/drivers/radeonsi/si_state.c
++++ b/src/gallium/drivers/radeonsi/si_state.c
+@@ -3040,6 +3040,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
+ 
+ void si_init_config(struct si_context *sctx)
+ {
++	unsigned num_rb = sctx->screen->b.info.r600_num_backends;
+ 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+ 
+ 	if (pm4 == NULL)
+@@ -3097,14 +3098,17 @@ void si_init_config(struct si_context *sctx)
+ 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
+ 			break;
+ 		case CHIP_KAVERI:
+-			/* XXX todo */
++			if (num_rb > 1)
++				si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000002);
++			else
++				si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
++			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
++			break;
+ 		case CHIP_KABINI:
+-			/* XXX todo */
+ 		case CHIP_MULLINS:
+-			/* XXX todo */
+ 		default:
+-			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0);
+-			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
++			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
++			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
+ 			break;
+ 		}
+ 	} else {
+-- 
+1.9.1
+
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0031-radeonsi-properly-handler-raster_config-setup-on-CZ.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0031-radeonsi-properly-handler-raster_config-setup-on-CZ.patch
new file mode 100644
index 00000000..0b234494
--- /dev/null
+++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0031-radeonsi-properly-handler-raster_config-setup-on-CZ.patch
@@ -0,0 +1,32 @@
+From 111520d282ddabf9a26c640c67ac9e88341a3423 Mon Sep 17 00:00:00 2001
+From: Arindam Nath <arindam.nath@amd.com>
+Date: Thu, 11 Jun 2015 14:09:25 +0530
+Subject: [PATCH 2/2] radeonsi: properly handler raster_config setup on CZ
+
+Need to take into account the number of RBs.
+
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+---
+ src/gallium/drivers/radeonsi/si_state.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
+index b3e77ec..f3c90e2 100644
+--- a/src/gallium/drivers/radeonsi/si_state.c
++++ b/src/gallium/drivers/radeonsi/si_state.c
+@@ -3093,10 +3093,10 @@ void si_init_config(struct si_context *sctx)
+ 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002a);
+ 			break;
+ 		case CHIP_ICELAND:
+-		case CHIP_CARRIZO:
+ 			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000002);
+ 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
+ 			break;
++		case CHIP_CARRIZO:
+ 		case CHIP_KAVERI:
+ 			if (num_rb > 1)
+ 				si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000002);
+-- 
+1.9.1
+