diff options
Diffstat (limited to 'meta-amdfalconx86/recipes-graphics/mesa/mesa/0012-radeonsi-add-VI-hardware-support.patch')
-rw-r--r-- | meta-amdfalconx86/recipes-graphics/mesa/mesa/0012-radeonsi-add-VI-hardware-support.patch | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-graphics/mesa/mesa/0012-radeonsi-add-VI-hardware-support.patch b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0012-radeonsi-add-VI-hardware-support.patch new file mode 100644 index 00000000..60b858b2 --- /dev/null +++ b/meta-amdfalconx86/recipes-graphics/mesa/mesa/0012-radeonsi-add-VI-hardware-support.patch @@ -0,0 +1,410 @@ +From 5de6f83f9ce743f5a2ae600bb5cb2eba48a54d3f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com> +Date: Thu, 16 Apr 2015 20:44:54 +0200 +Subject: [PATCH 12/29] radeonsi: add VI hardware support + +Signed-off-by: Arindam Nath <arindam.nath@amd.com> +--- + src/gallium/drivers/radeon/r600_pipe_common.c | 6 ++++ + src/gallium/drivers/radeon/r600_pipe_common.h | 1 + + src/gallium/drivers/radeonsi/si_descriptors.c | 15 +++++++-- + src/gallium/drivers/radeonsi/si_pipe.c | 5 ++- + src/gallium/drivers/radeonsi/si_shader.c | 24 +++++++++++-- + src/gallium/drivers/radeonsi/si_state.c | 45 +++++++++++++++++++++---- + src/gallium/drivers/radeonsi/si_state_draw.c | 41 ++++++++++++++++------ + src/gallium/drivers/radeonsi/si_state_shaders.c | 8 ++++- + 8 files changed, 121 insertions(+), 24 deletions(-) + +diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c +index 3b26dea..80a32ca 100644 +--- a/src/gallium/drivers/radeon/r600_pipe_common.c ++++ b/src/gallium/drivers/radeon/r600_pipe_common.c +@@ -379,6 +379,9 @@ static const char* r600_get_name(struct pipe_screen* pscreen) + case CHIP_KABINI: return "AMD KABINI"; + case CHIP_HAWAII: return "AMD HAWAII"; + case CHIP_MULLINS: return "AMD MULLINS"; ++ case CHIP_TONGA: return "AMD TONGA"; ++ case CHIP_ICELAND: return "AMD ICELAND"; ++ case CHIP_CARRIZO: return "AMD CARRIZO"; + default: return "AMD unknown"; + } + } +@@ -496,6 +499,9 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) + #else + return "kabini"; + #endif ++ case CHIP_TONGA: return "tonga"; ++ case CHIP_ICELAND: return "iceland"; ++ case CHIP_CARRIZO: return "carrizo"; + default: return ""; + } + } +diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h +index bdb4541..8944059 100644 +--- a/src/gallium/drivers/radeon/r600_pipe_common.h ++++ b/src/gallium/drivers/radeon/r600_pipe_common.h +@@ -232,6 +232,7 @@ struct r600_surface { + unsigned cb_color_pitch; /* EG and later */ + unsigned cb_color_slice; /* EG and later */ + unsigned cb_color_attrib; /* EG and later */ ++ unsigned cb_dcc_control; /* VI and later */ + unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ + unsigned cb_color_fmask_slice; /* EG and later */ + unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ +diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c +index bbfd36d..88a507d 100644 +--- a/src/gallium/drivers/radeonsi/si_descriptors.c ++++ b/src/gallium/drivers/radeonsi/si_descriptors.c +@@ -675,7 +675,8 @@ void si_update_vertex_buffers(struct si_context *sctx) + desc[0] = va & 0xFFFFFFFF; + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | + S_008F04_STRIDE(vb->stride); +- if (vb->stride) ++ ++ if (sctx->b.chip_class <= CIK && vb->stride) + /* Round up by rounding down and adding 1 */ + desc[2] = (vb->buffer->width0 - offset - + sctx->vertex_elements->format_size[i]) / +@@ -839,6 +840,9 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, + break; + } + ++ if (sctx->b.chip_class >= VI && stride) ++ num_records *= stride; ++ + /* Set the descriptor. */ + uint32_t *desc = buffers->desc_data[slot]; + desc[0] = va; +@@ -929,7 +933,11 @@ static void si_set_streamout_targets(struct pipe_context *ctx, + struct pipe_resource *buffer = targets[i]->buffer; + uint64_t va = r600_resource(buffer)->gpu_address; + +- /* Set the descriptor. */ ++ /* Set the descriptor. ++ * ++ * On VI, the format must be non-INVALID, otherwise ++ * the buffer will be considered not bound and store ++ * instructions will be no-ops. */ + uint32_t *desc = buffers->desc_data[bufidx]; + desc[0] = va; + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); +@@ -937,7 +945,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx, + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | +- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); ++ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | ++ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + + /* Set the resource. */ + pipe_resource_reference(&buffers->buffers[bufidx], +diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c +index 91b4d6f..0bb7a35 100644 +--- a/src/gallium/drivers/radeonsi/si_pipe.c ++++ b/src/gallium/drivers/radeonsi/si_pipe.c +@@ -130,6 +130,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * + switch (sctx->b.chip_class) { + case SI: + case CIK: ++ case VI: + si_init_state_functions(sctx); + si_init_shader_functions(sctx); + si_init_config(sctx); +@@ -181,7 +182,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * + r600_target = radeon_llvm_get_r600_target(triple); + sctx->tm = LLVMCreateTargetMachine(r600_target, triple, + r600_get_llvm_processor_name(sscreen->b.family), +- "+DumpCode,+vgpr-spilling", ++ sctx->b.chip_class >= VI ? ++ "+DumpCode" : ++ "+DumpCode,+vgpr-spilling", + LLVMCodeGenLevelDefault, + LLVMRelocDefault, + LLVMCodeModelDefault); +diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c +index 89f02ab..9b9a9aa 100644 +--- a/src/gallium/drivers/radeonsi/si_shader.c ++++ b/src/gallium/drivers/radeonsi/si_shader.c +@@ -2060,6 +2060,7 @@ static void txq_fetch_args( + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + const struct tgsi_full_instruction *inst = emit_data->inst; + struct gallivm_state *gallivm = bld_base->base.gallivm; ++ LLVMBuilderRef builder = gallivm->builder; + unsigned target = inst->Texture.Texture; + + if (target == TGSI_TEXTURE_BUFFER) { +@@ -2067,10 +2068,27 @@ static void txq_fetch_args( + LLVMTypeRef v8i32 = LLVMVectorType(i32, 8); + + /* Read the size from the buffer descriptor directly. */ +- LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index]; +- size = LLVMBuildBitCast(gallivm->builder, size, v8i32, ""); +- size = LLVMBuildExtractElement(gallivm->builder, size, ++ LLVMValueRef res = si_shader_ctx->resources[inst->Src[1].Register.Index]; ++ res = LLVMBuildBitCast(builder, res, v8i32, ""); ++ LLVMValueRef size = LLVMBuildExtractElement(builder, res, + lp_build_const_int32(gallivm, 6), ""); ++ ++ if (si_shader_ctx->screen->b.chip_class >= VI) { ++ /* On VI, the descriptor contains the size in bytes, ++ * but TXQ must return the size in elements. ++ * The stride is always non-zero for resources using TXQ. ++ */ ++ LLVMValueRef stride = ++ LLVMBuildExtractElement(builder, res, ++ lp_build_const_int32(gallivm, 5), ""); ++ stride = LLVMBuildLShr(builder, stride, ++ lp_build_const_int32(gallivm, 16), ""); ++ stride = LLVMBuildAnd(builder, stride, ++ lp_build_const_int32(gallivm, 0x3FFF), ""); ++ ++ size = LLVMBuildUDiv(builder, size, stride, ""); ++ } ++ + emit_data->args[0] = size; + return; + } +diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c +index 6a0b093..b35fbd5 100644 +--- a/src/gallium/drivers/radeonsi/si_state.c ++++ b/src/gallium/drivers/radeonsi/si_state.c +@@ -46,7 +46,7 @@ static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, + + uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) + { +- if (sscreen->b.chip_class == CIK && ++ if (sscreen->b.chip_class >= CIK && + sscreen->b.info.cik_macrotile_mode_array_valid) { + unsigned index, tileb; + +@@ -1815,6 +1815,9 @@ static void si_initialize_color_surface(struct si_context *sctx, + surf->cb_color_info = color_info; + surf->cb_color_attrib = color_attrib; + ++ if (sctx->b.chip_class >= VI) ++ surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1); ++ + if (rtex->fmask.size) { + surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; + surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); +@@ -1960,6 +1963,10 @@ static void si_init_depth_surface(struct si_context *sctx, + db_htile_surface = 0; + } + ++ /* Bug workaround. */ ++ if (sctx->b.chip_class >= VI) ++ s_info |= S_028044_TILE_STENCIL_DISABLE(1); ++ + assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); + + surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | +@@ -2051,7 +2058,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, + si_update_fb_rs_state(sctx); + si_update_fb_blend_state(sctx); + +- sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3; ++ sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3; + sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4; + sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */ + sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */ +@@ -2130,20 +2137,24 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom + RADEON_PRIO_COLOR_META); + } + +- r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13); ++ r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, ++ sctx->b.chip_class >= VI ? 14 : 13); + radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ + radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ + radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ + radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ + radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ + radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ +- radeon_emit(cs, 0); /* R_028C78 unused */ ++ radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ + radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ + radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ + radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ + radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ + radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ + radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ ++ ++ if (sctx->b.chip_class >= VI) ++ radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */ + } + /* set CB_COLOR1_INFO for possible dual-src blending */ + if (i == 1 && state->cbufs[0]) { +@@ -2286,7 +2297,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx + + /* Buffer resource. */ + if (texture->target == PIPE_BUFFER) { +- unsigned stride; ++ unsigned stride, num_records; + + desc = util_format_description(state->format); + first_non_void = util_format_get_first_non_void_channel(state->format); +@@ -2295,10 +2306,16 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx + format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); + num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); + ++ num_records = state->u.buf.last_element + 1 - state->u.buf.first_element; ++ num_records = MIN2(num_records, texture->width0 / stride); ++ ++ if (sctx->b.chip_class >= VI) ++ num_records *= stride; ++ + view->state[4] = va; + view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | + S_008F04_STRIDE(stride); +- view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element; ++ view->state[6] = num_records; + view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | + S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | + S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | +@@ -3070,6 +3087,15 @@ void si_init_config(struct si_context *sctx) + si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a); + si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e); + break; ++ case CHIP_TONGA: ++ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012); ++ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002a); ++ break; ++ case CHIP_ICELAND: ++ case CHIP_CARRIZO: ++ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000002); ++ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); ++ break; + case CHIP_KAVERI: + /* XXX todo */ + case CHIP_KABINI: +@@ -3162,5 +3188,12 @@ void si_init_config(struct si_context *sctx) + si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); + } + ++ if (sctx->b.chip_class >= VI) { ++ si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, ++ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1)); ++ si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); ++ si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); ++ } ++ + sctx->init_config = pm4; + } +diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c +index 2e77d85..2d38b20 100644 +--- a/src/gallium/drivers/radeonsi/si_state_draw.c ++++ b/src/gallium/drivers/radeonsi/si_state_draw.c +@@ -146,7 +146,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, + return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | + S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | + S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | +- S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0); ++ S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) | ++ S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0); + } + + static void si_emit_scratch_reloc(struct si_context *sctx) +@@ -275,12 +276,24 @@ static void si_emit_draw_packets(struct si_context *sctx, + if (info->indexed) { + radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); + +- if (ib->index_size == 4) { +- radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ? +- V_028A7C_VGT_DMA_SWAP_32_BIT : 0)); +- } else { +- radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ? +- V_028A7C_VGT_DMA_SWAP_16_BIT : 0)); ++ /* index type */ ++ switch (ib->index_size) { ++ case 1: ++ radeon_emit(cs, V_028A7C_VGT_INDEX_8); ++ break; ++ case 2: ++ radeon_emit(cs, V_028A7C_VGT_INDEX_16 | ++ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ? ++ V_028A7C_VGT_DMA_SWAP_16_BIT : 0)); ++ break; ++ case 4: ++ radeon_emit(cs, V_028A7C_VGT_INDEX_32 | ++ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ? ++ V_028A7C_VGT_DMA_SWAP_32_BIT : 0)); ++ break; ++ default: ++ assert(!"unreachable"); ++ return; + } + } + +@@ -406,9 +419,14 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato + + if (sctx->flags & SI_CONTEXT_INV_TC_L1) + cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); +- if (sctx->flags & SI_CONTEXT_INV_TC_L2) ++ if (sctx->flags & SI_CONTEXT_INV_TC_L2) { + cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); + ++ /* TODO: this might not be needed. */ ++ if (sctx->chip_class >= VI) ++ cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1); ++ } ++ + if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) { + cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | + S_0085F0_CB0_DEST_BASE_ENA(1) | +@@ -550,7 +568,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) + ib.offset = sctx->index_buffer.offset; + + /* Translate or upload, if needed. */ +- if (ib.index_size == 1) { ++ /* 8-bit indices are supported on VI. */ ++ if (sctx->b.chip_class <= CIK && ib.index_size == 1) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset, start, count, start_offset; + void *ptr; +@@ -585,6 +604,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) + } + } + ++ /* TODO: VI should read index buffers through TC, so this shouldn't be ++ * needed on VI. */ + if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) { + sctx->b.flags |= SI_CONTEXT_INV_TC_L2; + r600_resource(ib.buffer)->TC_L2_dirty = false; +@@ -618,7 +639,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) + + /* Workaround for a VGT hang when streamout is enabled. + * It must be done after drawing. */ +- if (sctx->b.family == CHIP_HAWAII && ++ if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) && + (sctx->b.streamout.streamout_enabled || + sctx->b.streamout.prims_gen_query_enabled)) { + sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC; +diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c +index 1bbc6b3..09ca792 100644 +--- a/src/gallium/drivers/radeonsi/si_state_shaders.c ++++ b/src/gallium/drivers/radeonsi/si_state_shaders.c +@@ -700,7 +700,7 @@ bcolor: + static void si_init_gs_rings(struct si_context *sctx) + { + unsigned esgs_ring_size = 128 * 1024; +- unsigned gsvs_ring_size = 64 * 1024 * 1024; ++ unsigned gsvs_ring_size = 60 * 1024 * 1024; + + assert(!sctx->gs_rings); + sctx->gs_rings = CALLOC_STRUCT(si_pm4_state); +@@ -712,6 +712,12 @@ static void si_init_gs_rings(struct si_context *sctx) + PIPE_USAGE_DEFAULT, gsvs_ring_size); + + if (sctx->b.chip_class >= CIK) { ++ if (sctx->b.chip_class >= VI) { ++ /* The maximum sizes are 63.999 MB on VI, because ++ * the register fields only have 18 bits. */ ++ assert(esgs_ring_size / 256 < (1 << 18)); ++ assert(gsvs_ring_size / 256 < (1 << 18)); ++ } + si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE, + esgs_ring_size / 256); + si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE, +-- +1.9.1 + |