shader: use varying outputs directly for TF when possible

Some varying outputs can be directly used for tranform feedback, so don't emit an additional varying in these cases. This should save a move instruction and also reduces the possibility of hitting the output varying limit. Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-By: Gurchetan Singh <gurchetansingh@chromium.org>
6 years ago · 9157dcbca0
parent 3c195a9a36
commit 9157dcbca0
3 changed files with 32 additions and 10 deletions
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@ -208,6 +208,7 @@ struct pipe_stream_output_info
      unsigned output_buffer:3;   /**< 0 to PIPE_MAX_SO_BUFFERS */
      unsigned dst_offset:16;     /**< offset into the buffer in dwords */
      unsigned stream:2;
+      unsigned need_temp:1;
   } output[PIPE_MAX_SO_OUTPUTS];
 };

--- a/src/vrend_decode.c
+++ b/src/vrend_decode.c
@ -109,6 +109,14 @@ static int vrend_decode_create_shader(struct vrend_decode_ctx *ctx,
            so_info.output[i].dst_offset = (tmp >> 16) & 0xffff;
            tmp = get_buf_entry(ctx, VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(i));
            so_info.output[i].stream = (tmp & 0x3);
+            so_info.output[i].need_temp = so_info.output[i].num_components < 4;
+         }
+
+         for (i = 0; i < so_info.num_outputs - 1; i++) {
+            for (unsigned j = i + 1; j < so_info.num_outputs; j++) {
+               so_info.output[j].need_temp |=
+                     (so_info.output[i].register_index == so_info.output[j].register_index);
+            }
         }
      }
      shader_offset += 4 + (2 * num_so_outputs);
--- a/src/vrend_shader.c
+++ b/src/vrend_shader.c
@ -1705,8 +1705,6 @@ static void emit_so_movs(struct dump_ctx *ctx)
      return;
   }

-   const char *stage_prefix = get_stage_output_name_prefix(ctx->prog_type);
-
   for (i = 0; i < ctx->so->num_outputs; i++) {
      const struct vrend_shader_io *output = get_io_slot(&ctx->outputs[0], ctx->num_outputs, ctx->so->output[i].register_index);
      if (ctx->so->output[i].start_component != 0) {
@ -1735,7 +1733,7 @@ static void emit_so_movs(struct dump_ctx *ctx)
            ctx->has_clipvertex_so = true;
         } else {
            char out_var[255];
-            get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, writemask);
+            get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, "");
            ctx->so_names[i] = strdup(out_var);
         }
      } else {
@ -1765,8 +1763,14 @@ static void emit_so_movs(struct dump_ctx *ctx)
      } else {
         if (ctx->write_so_outputs[i]) {
            char out_var[255];
-            get_so_name(ctx, false, output, ctx->so->output[i].register_index, out_var, writemask);
-            emit_buff(ctx, "tfout%d = %s(%s%s);\n", i, outtype, out_var, writemask);
+            if (ctx->so->output[i].need_temp || ctx->prog_type == TGSI_PROCESSOR_GEOMETRY
+                || output->glsl_predefined_no_emit) {
+               get_so_name(ctx, false, output, ctx->so->output[i].register_index, out_var, writemask);
+               emit_buff(ctx, "tfout%d = %s(%s);\n", i, outtype, out_var);
+            } else {
+               get_so_name(ctx, true, output, ctx->so->output[i].register_index, out_var, writemask);
+               ctx->so_names[i] = strdup(out_var);
+            }
         }
      }
   }
@ -5171,12 +5175,21 @@ static void emit_ios_streamout(struct dump_ctx *ctx)
            snprintf(outtype, 6, "float");
         else
            snprintf(outtype, 6, "vec%d", ctx->so->output[i].num_components);
-	 if (ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL)
-            emit_hdrf(ctx, "out %s tfout%d[];\n", outtype, i);
-         else if (ctx->so->output[i].stream && ctx->prog_type == TGSI_PROCESSOR_GEOMETRY)
+
+         if (ctx->so->output[i].stream && ctx->prog_type == TGSI_PROCESSOR_GEOMETRY)
            emit_hdrf(ctx, "layout (stream=%d) out %s tfout%d;\n", ctx->so->output[i].stream, outtype, i);
-         else
-            emit_hdrf(ctx, "out %s tfout%d;\n", outtype, i);
+         else  {
+            const struct vrend_shader_io *output = get_io_slot(&ctx->outputs[0], ctx->num_outputs,
+                  ctx->so->output[i].register_index);
+            if (ctx->so->output[i].need_temp || output->name == TGSI_SEMANTIC_CLIPDIST ||
+                output->glsl_predefined_no_emit) {
+
+               if (ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL)
+                  emit_hdrf(ctx, "out %s tfout%d[];\n", outtype, i);
+               else
+                  emit_hdrf(ctx, "out %s tfout%d;\n", outtype, i);
+            }
+         }
      }
   }
 }