From 9157dcbca0b3001d63ef0755e707f7bd4e2311a6 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Tue, 12 Feb 2019 19:50:33 +0100 Subject: [PATCH] shader: use varying outputs directly for TF when possible Some varying outputs can be directly used for tranform feedback, so don't emit an additional varying in these cases. This should save a move instruction and also reduces the possibility of hitting the output varying limit. Signed-off-by: Gert Wollny Reviewed-By: Gurchetan Singh --- src/gallium/include/pipe/p_state.h | 1 + src/vrend_decode.c | 8 ++++++++ src/vrend_shader.c | 33 +++++++++++++++++++++--------- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 870b0cc..f0a7b55 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -208,6 +208,7 @@ struct pipe_stream_output_info unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */ unsigned dst_offset:16; /**< offset into the buffer in dwords */ unsigned stream:2; + unsigned need_temp:1; } output[PIPE_MAX_SO_OUTPUTS]; }; diff --git a/src/vrend_decode.c b/src/vrend_decode.c index e830d07..bcef26d 100644 --- a/src/vrend_decode.c +++ b/src/vrend_decode.c @@ -109,6 +109,14 @@ static int vrend_decode_create_shader(struct vrend_decode_ctx *ctx, so_info.output[i].dst_offset = (tmp >> 16) & 0xffff; tmp = get_buf_entry(ctx, VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(i)); so_info.output[i].stream = (tmp & 0x3); + so_info.output[i].need_temp = so_info.output[i].num_components < 4; + } + + for (i = 0; i < so_info.num_outputs - 1; i++) { + for (unsigned j = i + 1; j < so_info.num_outputs; j++) { + so_info.output[j].need_temp |= + (so_info.output[i].register_index == so_info.output[j].register_index); + } } } shader_offset += 4 + (2 * num_so_outputs); diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 1e6a356..a4193ff 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -1705,8 +1705,6 @@ static void emit_so_movs(struct dump_ctx *ctx) return; } - const char *stage_prefix = get_stage_output_name_prefix(ctx->prog_type); - for (i = 0; i < ctx->so->num_outputs; i++) { const struct vrend_shader_io *output = get_io_slot(&ctx->outputs[0], ctx->num_outputs, ctx->so->output[i].register_index); if (ctx->so->output[i].start_component != 0) { @@ -1735,7 +1733,7 @@ static void emit_so_movs(struct dump_ctx *ctx) ctx->has_clipvertex_so = true; } else { char out_var[255]; - get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, writemask); + get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, ""); ctx->so_names[i] = strdup(out_var); } } else { @@ -1765,8 +1763,14 @@ static void emit_so_movs(struct dump_ctx *ctx) } else { if (ctx->write_so_outputs[i]) { char out_var[255]; - get_so_name(ctx, false, output, ctx->so->output[i].register_index, out_var, writemask); - emit_buff(ctx, "tfout%d = %s(%s%s);\n", i, outtype, out_var, writemask); + if (ctx->so->output[i].need_temp || ctx->prog_type == TGSI_PROCESSOR_GEOMETRY + || output->glsl_predefined_no_emit) { + get_so_name(ctx, false, output, ctx->so->output[i].register_index, out_var, writemask); + emit_buff(ctx, "tfout%d = %s(%s);\n", i, outtype, out_var); + } else { + get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, writemask); + ctx->so_names[i] = strdup(out_var); + } } } } @@ -5171,12 +5175,21 @@ static void emit_ios_streamout(struct dump_ctx *ctx) snprintf(outtype, 6, "float"); else snprintf(outtype, 6, "vec%d", ctx->so->output[i].num_components); - if (ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL) - emit_hdrf(ctx, "out %s tfout%d[];\n", outtype, i); - else if (ctx->so->output[i].stream && ctx->prog_type == TGSI_PROCESSOR_GEOMETRY) + + if (ctx->so->output[i].stream && ctx->prog_type == TGSI_PROCESSOR_GEOMETRY) emit_hdrf(ctx, "layout (stream=%d) out %s tfout%d;\n", ctx->so->output[i].stream, outtype, i); - else - emit_hdrf(ctx, "out %s tfout%d;\n", outtype, i); + else { + const struct vrend_shader_io *output = get_io_slot(&ctx->outputs[0], ctx->num_outputs, + ctx->so->output[i].register_index); + if (ctx->so->output[i].need_temp || output->name == TGSI_SEMANTIC_CLIPDIST || + output->glsl_predefined_no_emit) { + + if (ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL) + emit_hdrf(ctx, "out %s tfout%d[];\n", outtype, i); + else + emit_hdrf(ctx, "out %s tfout%d;\n", outtype, i); + } + } } } }