shader: use varying outputs directly for TF when possible

Some varying outputs can be directly used for tranform feedback, so don't
emit an additional varying in these cases. This should save a move
instruction and also reduces the possibility of hitting the output varying
limit.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-By: Gurchetan Singh <gurchetansingh@chromium.org>
macos/master
Gert Wollny 6 years ago committed by Gert Wollny
parent 3c195a9a36
commit 9157dcbca0
  1. 1
      src/gallium/include/pipe/p_state.h
  2. 8
      src/vrend_decode.c
  3. 25
      src/vrend_shader.c

@ -208,6 +208,7 @@ struct pipe_stream_output_info
unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */ unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */
unsigned dst_offset:16; /**< offset into the buffer in dwords */ unsigned dst_offset:16; /**< offset into the buffer in dwords */
unsigned stream:2; unsigned stream:2;
unsigned need_temp:1;
} output[PIPE_MAX_SO_OUTPUTS]; } output[PIPE_MAX_SO_OUTPUTS];
}; };

@ -109,6 +109,14 @@ static int vrend_decode_create_shader(struct vrend_decode_ctx *ctx,
so_info.output[i].dst_offset = (tmp >> 16) & 0xffff; so_info.output[i].dst_offset = (tmp >> 16) & 0xffff;
tmp = get_buf_entry(ctx, VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(i)); tmp = get_buf_entry(ctx, VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(i));
so_info.output[i].stream = (tmp & 0x3); so_info.output[i].stream = (tmp & 0x3);
so_info.output[i].need_temp = so_info.output[i].num_components < 4;
}
for (i = 0; i < so_info.num_outputs - 1; i++) {
for (unsigned j = i + 1; j < so_info.num_outputs; j++) {
so_info.output[j].need_temp |=
(so_info.output[i].register_index == so_info.output[j].register_index);
}
} }
} }
shader_offset += 4 + (2 * num_so_outputs); shader_offset += 4 + (2 * num_so_outputs);

@ -1705,8 +1705,6 @@ static void emit_so_movs(struct dump_ctx *ctx)
return; return;
} }
const char *stage_prefix = get_stage_output_name_prefix(ctx->prog_type);
for (i = 0; i < ctx->so->num_outputs; i++) { for (i = 0; i < ctx->so->num_outputs; i++) {
const struct vrend_shader_io *output = get_io_slot(&ctx->outputs[0], ctx->num_outputs, ctx->so->output[i].register_index); const struct vrend_shader_io *output = get_io_slot(&ctx->outputs[0], ctx->num_outputs, ctx->so->output[i].register_index);
if (ctx->so->output[i].start_component != 0) { if (ctx->so->output[i].start_component != 0) {
@ -1735,7 +1733,7 @@ static void emit_so_movs(struct dump_ctx *ctx)
ctx->has_clipvertex_so = true; ctx->has_clipvertex_so = true;
} else { } else {
char out_var[255]; char out_var[255];
get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, writemask); get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, "");
ctx->so_names[i] = strdup(out_var); ctx->so_names[i] = strdup(out_var);
} }
} else { } else {
@ -1765,8 +1763,14 @@ static void emit_so_movs(struct dump_ctx *ctx)
} else { } else {
if (ctx->write_so_outputs[i]) { if (ctx->write_so_outputs[i]) {
char out_var[255]; char out_var[255];
if (ctx->so->output[i].need_temp || ctx->prog_type == TGSI_PROCESSOR_GEOMETRY
|| output->glsl_predefined_no_emit) {
get_so_name(ctx, false, output, ctx->so->output[i].register_index, out_var, writemask); get_so_name(ctx, false, output, ctx->so->output[i].register_index, out_var, writemask);
emit_buff(ctx, "tfout%d = %s(%s%s);\n", i, outtype, out_var, writemask); emit_buff(ctx, "tfout%d = %s(%s);\n", i, outtype, out_var);
} else {
get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, writemask);
ctx->so_names[i] = strdup(out_var);
}
} }
} }
} }
@ -5171,14 +5175,23 @@ static void emit_ios_streamout(struct dump_ctx *ctx)
snprintf(outtype, 6, "float"); snprintf(outtype, 6, "float");
else else
snprintf(outtype, 6, "vec%d", ctx->so->output[i].num_components); snprintf(outtype, 6, "vec%d", ctx->so->output[i].num_components);
if (ctx->so->output[i].stream && ctx->prog_type == TGSI_PROCESSOR_GEOMETRY)
emit_hdrf(ctx, "layout (stream=%d) out %s tfout%d;\n", ctx->so->output[i].stream, outtype, i);
else {
const struct vrend_shader_io *output = get_io_slot(&ctx->outputs[0], ctx->num_outputs,
ctx->so->output[i].register_index);
if (ctx->so->output[i].need_temp || output->name == TGSI_SEMANTIC_CLIPDIST ||
output->glsl_predefined_no_emit) {
if (ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL) if (ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL)
emit_hdrf(ctx, "out %s tfout%d[];\n", outtype, i); emit_hdrf(ctx, "out %s tfout%d[];\n", outtype, i);
else if (ctx->so->output[i].stream && ctx->prog_type == TGSI_PROCESSOR_GEOMETRY)
emit_hdrf(ctx, "layout (stream=%d) out %s tfout%d;\n", ctx->so->output[i].stream, outtype, i);
else else
emit_hdrf(ctx, "out %s tfout%d;\n", outtype, i); emit_hdrf(ctx, "out %s tfout%d;\n", outtype, i);
} }
} }
}
}
} }
static inline void emit_winsys_correction(struct dump_ctx *ctx) static inline void emit_winsys_correction(struct dump_ctx *ctx)

Loading…
Cancel
Save