From a861e9dff40ffea1ad5e6fc49b3f7ff6852d3a95 Mon Sep 17 00:00:00 2001 From: Lepton Wu Date: Tue, 19 May 2020 16:25:59 -0700 Subject: [PATCH] shader: Use integer type for ARM (MALI GPU) Currently, even the input/output buffers are integer formats, we still use float type for them. This is actually undefined behavior and MALI GPU will do type conversion and cause unexpected results. To fix this, we check the input buffer format for vertex shader and also color buffer format for fragment shader, if they are integer types, just use integer types in generated shaders. Since the old way works fine on other GPU, only enable this fix for ARM MALI for now. The plan is to enable this for other GPU also with auto detection. Signed-off-by: Lepton Wu Reviewed-by: Gert Wollny --- src/vrend_renderer.c | 53 +++++++++++++++++++++++++++++-- src/vrend_shader.c | 75 +++++++++++++++++++++++++++++++++++++++----- src/vrend_shader.h | 5 +++ 3 files changed, 124 insertions(+), 9 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 33266b1..89ef2a2 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -291,6 +291,7 @@ struct global_renderer_state { bool use_gles; bool use_core_profile; bool use_external_blob; + bool use_integer; bool features[feat_last]; @@ -492,6 +493,8 @@ struct vrend_vertex_element_array { unsigned count; struct vrend_vertex_element elements[PIPE_MAX_ATTRIBS]; GLuint id; + uint32_t signed_int_bitmask; + uint32_t unsigned_int_bitmask; }; struct vrend_constants { @@ -657,6 +660,7 @@ struct vrend_context { bool in_error; bool ctx_switch_pending; bool pstip_inited; + bool drawing; GLuint pstipple_tex_id; @@ -2566,6 +2570,18 @@ void vrend_set_viewport_states(struct vrend_context *ctx, } } +static void update_int_sign_masks(enum pipe_format fmt, int i, + uint32_t *signed_mask, + uint32_t *unsigned_mask) { + if (vrend_state.use_integer && + util_format_is_pure_integer(fmt)) { + if (util_format_is_pure_uint(fmt)) + (*unsigned_mask) |= (1 << i); + else + (*signed_mask) |= (1 << i); + } +} + int vrend_create_vertex_elements_state(struct vrend_context *ctx, uint32_t handle, unsigned num_elements, @@ -2654,8 +2670,12 @@ int vrend_create_vertex_elements_state(struct vrend_context *ctx, for (i = 0; i < num_elements; i++) { struct vrend_vertex_element *ve = &v->elements[i]; - if (util_format_is_pure_integer(ve->base.src_format)) + if (util_format_is_pure_integer(ve->base.src_format)) { + update_int_sign_masks(ve->base.src_format, i, + &v->signed_int_bitmask, + &v->unsigned_int_bitmask); glVertexAttribIFormat(i, ve->nr_chan, ve->type, ve->base.src_offset); + } else glVertexAttribFormat(i, ve->nr_chan, ve->type, ve->norm, ve->base.src_offset); glVertexAttribBinding(i, ve->base.vertex_buffer_index); @@ -3125,13 +3145,22 @@ static inline void vrend_fill_shader_key(struct vrend_context *ctx, int i; bool add_alpha_test = true; key->cbufs_are_a8_bitmask = 0; + // Only use integer info when drawing to avoid stale info. + if (vrend_state.use_integer && ctx->drawing) { + key->attrib_signed_int_bitmask = ctx->sub->ve->signed_int_bitmask; + key->attrib_unsigned_int_bitmask = ctx->sub->ve->unsigned_int_bitmask; + } for (i = 0; i < ctx->sub->nr_cbufs; i++) { if (!ctx->sub->surf[i]) continue; if (vrend_format_is_emulated_alpha(ctx->sub->surf[i]->format)) key->cbufs_are_a8_bitmask |= (1 << i); - if (util_format_is_pure_integer(ctx->sub->surf[i]->format)) + if (util_format_is_pure_integer(ctx->sub->surf[i]->format)) { add_alpha_test = false; + update_int_sign_masks(ctx->sub->surf[i]->format, i, + &key->cbufs_signed_int_bitmask, + &key->cbufs_unsigned_int_bitmask); + } key->surface_component_bits[i] = util_format_get_component_bits(ctx->sub->surf[i]->format, UTIL_FORMAT_COLORSPACE_RGB, 0); } if (add_alpha_test) { @@ -4377,7 +4406,14 @@ int vrend_draw_vbo(struct vrend_context *ctx, return 0; } + // For some GPU, we'd like to use integer variable in generated GLSL if + // the input buffers are integer formats. But we actually don't know the + // buffer formats when the shader is created, we only know it here. + // Set it to true so the underlying code knows to use the buffer formats + // now. + ctx->drawing = true; vrend_shader_select(ctx, ctx->sub->shaders[PIPE_SHADER_VERTEX], &vs_dirty); + ctx->drawing = false; if (ctx->sub->shaders[PIPE_SHADER_TESS_CTRL] && ctx->sub->shaders[PIPE_SHADER_TESS_CTRL]->tokens) vrend_shader_select(ctx, ctx->sub->shaders[PIPE_SHADER_TESS_CTRL], &tcs_dirty); @@ -5893,6 +5929,16 @@ vrend_renderer_get_pipe_callbacks(void) return &callbacks; } +static bool use_integer() { + if (getenv("VIRGL_USE_INTEGER")) + return true; + + const char * a = (const char *) glGetString(GL_VENDOR); + if (strcmp(a, "ARM") == 0) + return true; + return false; +} + int vrend_renderer_init(struct vrend_if_cbs *cbs, uint32_t flags) { bool gles; @@ -5953,6 +5999,8 @@ int vrend_renderer_init(struct vrend_if_cbs *cbs, uint32_t flags) vrend_printf( "gl_version %d - compat profile\n", gl_ver); } + vrend_state.use_integer = use_integer(); + init_features(gles ? 0 : gl_ver, gles ? gl_ver : 0); @@ -6188,6 +6236,7 @@ struct vrend_context *vrend_create_context(int id, uint32_t nlen, const char *de grctx->shader_cfg.has_gpu_shader5 = has_feature(feat_gpu_shader5); grctx->shader_cfg.has_es31_compat = has_feature(feat_gles31_compatibility); grctx->shader_cfg.has_conservative_depth = has_feature(feat_conservative_depth); + grctx->shader_cfg.use_integer = vrend_state.use_integer; vrend_renderer_create_sub_ctx(grctx, 0); vrend_renderer_set_sub_ctx(grctx, 0); diff --git a/src/vrend_shader.c b/src/vrend_shader.c index e20da4b..1396ce6 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -80,6 +80,12 @@ #define FRONT_COLOR_EMITTED (1 << 0) #define BACK_COLOR_EMITTED (1 << 1); +enum vec_type { + VEC_FLOAT = 0, + VEC_INT = 1, + VEC_UINT = 2 +}; + struct vrend_shader_io { unsigned name; unsigned gpr; @@ -101,6 +107,7 @@ struct vrend_shader_io { bool glsl_gl_block; bool override_no_wm; bool is_int; + enum vec_type type; bool fbfetch_used; char glsl_name[128]; unsigned stream; @@ -912,6 +919,17 @@ static bool logiop_require_inout(const struct vrend_shader_key *key) } } +static enum vec_type get_type(uint32_t signed_int_mask, + uint32_t unsigned_int_mask, + int bit) { + if (signed_int_mask & (1 << bit)) + return VEC_INT; + else if (unsigned_int_mask & (1 << bit)) + return VEC_UINT; + else + return VEC_FLOAT; +} + static boolean iter_declaration(struct tgsi_iterate_context *iter, struct tgsi_full_declaration *decl ) @@ -941,6 +959,9 @@ iter_declaration(struct tgsi_iterate_context *iter, } if (iter->processor.Processor == TGSI_PROCESSOR_VERTEX) { ctx->attrib_input_mask |= (1 << decl->Range.First); + ctx->inputs[i].type = get_type(ctx->key->attrib_signed_int_bitmask, + ctx->key->attrib_unsigned_int_bitmask, + decl->Range.First); } ctx->inputs[i].name = decl->Semantic.Name; ctx->inputs[i].sid = decl->Semantic.Index; @@ -1291,6 +1312,12 @@ iter_declaration(struct tgsi_iterate_context *iter, } break; case TGSI_SEMANTIC_COLOR: + if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + ctx->outputs[i].type = get_type(ctx->key->cbufs_signed_int_bitmask, + ctx->key->cbufs_unsigned_int_bitmask, + ctx->outputs[i].sid); + } + if (iter->processor.Processor == TGSI_PROCESSOR_VERTEX) { if (ctx->glsl_ver_required < 140) { ctx->outputs[i].glsl_no_index = true; @@ -3605,6 +3632,16 @@ get_destination_info(struct dump_ctx *ctx, dinfo->dtypeprefix = FLOAT_BITS_TO_INT; dinfo->dstconv = INT; } + else if (ctx->outputs[j].type == VEC_UINT) { + if (dinfo->dtypeprefix == TYPE_CONVERSION_NONE) + dinfo->dtypeprefix = FLOAT_BITS_TO_UINT; + dinfo->dstconv = dinfo->udstconv; + } + else if (ctx->outputs[j].type == VEC_INT) { + if (dinfo->dtypeprefix == TYPE_CONVERSION_NONE) + dinfo->dtypeprefix = FLOAT_BITS_TO_INT; + dinfo->dstconv = dinfo->idstconv; + } if (ctx->outputs[j].name == TGSI_SEMANTIC_PSIZE) { dinfo->dstconv = FLOAT; break; @@ -3901,6 +3938,16 @@ get_source_info(struct dump_ctx *ctx, if ((stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_SIGNED) && ctx->inputs[j].is_int) srcstypeprefix = TYPE_CONVERSION_NONE; + else if (ctx->inputs[j].type) { + if (stype == TGSI_TYPE_UNSIGNED) + srcstypeprefix = UVEC4; + else if (stype == TGSI_TYPE_SIGNED) + srcstypeprefix = IVEC4; + else if (ctx->inputs[j].type == VEC_INT) + srcstypeprefix = INT_BITS_TO_FLOAT; + else // ctx->inputs[j].type == VEC_UINT + srcstypeprefix = UINT_BITS_TO_FLOAT; + } if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1) { strbuf_fmt(src_buf, "floatBitsToInt(%s%s%s%s)", prefix, ctx->inputs[j].glsl_name, arrayname, swizzle); @@ -5943,8 +5990,13 @@ emit_ios_generic(struct dump_ctx *ctx, enum io_type iot, const char *prefix, const struct vrend_shader_io *io, const char *inout, const char *postfix) { - const char type[4][6] = {"float", " vec2", " vec3", " vec4"}; - const char *t = " vec4"; + const char *atype[3][4] = { + {"float", " vec2", " vec3", " vec4"}, + {" int", "ivec2", "ivec3", "ivec4"}, + {" uint", "uvec2", "uvec3", "uvec4"}, + }; + const char **type = atype[io->type]; + const char *t = type[3]; char layout[128] = ""; @@ -6113,7 +6165,9 @@ static void emit_ios_vs(struct dump_ctx *ctx) } if (ctx->inputs[i].first != ctx->inputs[i].last) snprintf(postfix, sizeof(postfix), "[%d]", ctx->inputs[i].last - ctx->inputs[i].first + 1); - emit_hdrf(ctx, "in vec4 %s%s;\n", ctx->inputs[i].glsl_name, postfix); + const char *vtype[3] = {"vec4", "ivec4", "uvec4"}; + emit_hdrf(ctx, "in %s %s%s;\n", + vtype[ctx->inputs[i].type], ctx->inputs[i].glsl_name, postfix); } } @@ -6255,18 +6309,25 @@ static void emit_ios_fs(struct dump_ctx *ctx) } if (ctx->write_all_cbufs) { + const char* type = "vec4"; + if (ctx->key->cbufs_unsigned_int_bitmask) + type = "uvec4"; + else if (ctx->key->cbufs_signed_int_bitmask) + type = "ivec4"; + for (i = 0; i < (uint32_t)ctx->cfg->max_draw_buffers; i++) { if (ctx->cfg->use_gles) { if (ctx->key->fs_logicop_enabled) - emit_hdrf(ctx, "vec4 fsout_tmp_c%d;\n", i); + emit_hdrf(ctx, "%s fsout_tmp_c%d;\n", type, i); if (logiop_require_inout(ctx->key)) { const char *noncoherent = ctx->key->fs_logicop_emulate_coherent ? "" : ", noncoherent"; - emit_hdrf(ctx, "layout (location=%d%s) inout highp vec4 fsout_c%d;\n", i, noncoherent, i); + emit_hdrf(ctx, "layout (location=%d%s) inout highp %s fsout_c%d;\n", i, noncoherent, type, i); } else - emit_hdrf(ctx, "layout (location=%d) out vec4 fsout_c%d;\n", i, i); + emit_hdrf(ctx, "layout (location=%d) out %s fsout_c%d;\n", i, + type, i); } else - emit_hdrf(ctx, "out vec4 fsout_c%d;\n", i); + emit_hdrf(ctx, "out %s fsout_c%d;\n", type, i); } } else { for (i = 0; i < ctx->num_outputs; i++) { diff --git a/src/vrend_shader.h b/src/vrend_shader.h index 71aefe8..284cf13 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -142,6 +142,10 @@ struct vrend_shader_key { uint8_t prev_stage_num_cull_out; bool next_stage_pervertex_in; uint32_t cbufs_are_a8_bitmask; + uint32_t cbufs_signed_int_bitmask; + uint32_t cbufs_unsigned_int_bitmask; + uint32_t attrib_signed_int_bitmask; + uint32_t attrib_unsigned_int_bitmask; uint8_t num_indirect_generic_outputs; uint8_t num_indirect_patch_outputs; uint8_t num_indirect_generic_inputs; @@ -164,6 +168,7 @@ struct vrend_shader_cfg { bool has_gpu_shader5; bool has_es31_compat; bool has_conservative_depth; + bool use_integer; }; struct vrend_context;