From 00093d0a8e7cd98d5d1208e4917d333ecb381f7c Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 15 Jan 2021 14:57:36 +0100 Subject: [PATCH] vrend: use an array of program lists and merge dual_src into compare value With that Unigine Heaven goes from around 56 FPS to 58 Signed-off-by: Gert Wollny Reviewed-by: Chia-I Wu --- src/vrend_renderer.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index eb096ef..4e634a2 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -540,6 +540,9 @@ struct vrend_vertex_buffer { uint32_t res_id; }; +#define VREND_PROGRAM_NQUEUES (1 << 8) +#define VREND_PROGRAM_NQUEUE_MASK (VREND_PROGRAM_NQUEUES - 1) + struct vrend_sub_context { struct list_head head; @@ -550,7 +553,11 @@ struct vrend_sub_context { GLuint vaoid; uint32_t enabled_attribs_bitmask; - struct list_head gl_programs; + /* Using an array of lists only adds VREND_PROGRAM_NQUEUES - 1 list_head + * structures to the consumed memory, but looking up the program can + * be spead up by the factor VREND_PROGRAM_NQUEUES which makes this + * worthwile. */ + struct list_head gl_programs[VREND_PROGRAM_NQUEUES]; struct list_head cs_programs; struct util_hash_table *object_hash; @@ -1591,7 +1598,9 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_conte sprog->ss[PIPE_SHADER_VERTEX] = vs; sprog->ss[PIPE_SHADER_FRAGMENT] = fs; - sprog->vs_fs_key = (uint64_t)vs->id << 32 | fs->id; + sprog->vs_fs_key = (((uint64_t)fs->id) << 32) | (vs->id & ~VREND_PROGRAM_NQUEUE_MASK) | + (sprog->dual_src_linked ? 1 : 0); + sprog->ss[PIPE_SHADER_GEOMETRY] = gs; sprog->ss[PIPE_SHADER_TESS_CTRL] = tcs; sprog->ss[PIPE_SHADER_TESS_EVAL] = tes; @@ -1608,7 +1617,7 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_conte last_shader = tes ? PIPE_SHADER_TESS_EVAL : (gs ? PIPE_SHADER_GEOMETRY : PIPE_SHADER_FRAGMENT); sprog->id = prog_id; - list_addtail(&sprog->head, &ctx->sub->gl_programs); + list_addtail(&sprog->head, &ctx->sub->gl_programs[vs->id & VREND_PROGRAM_NQUEUE_MASK]); if (fs->key.pstipple_tex) sprog->fs_stipple_loc = glGetUniformLocation(prog_id, "pstipple_sampler"); @@ -1678,12 +1687,13 @@ static struct vrend_linked_shader_program *lookup_shader_program(struct vrend_co GLuint tes_id, bool dual_src) { - uint64_t vs_fs_key = (((uint64_t)vs_id) << 32) | fs_id; + uint64_t vs_fs_key = (((uint64_t)fs_id) << 32) | (vs_id & ~VREND_PROGRAM_NQUEUE_MASK) | + (dual_src ? 1 : 0); struct vrend_linked_shader_program *ent; - LIST_FOR_EACH_ENTRY(ent, &ctx->sub->gl_programs, head) { - if (ent->dual_src_linked != dual_src) - continue; + + struct list_head *programs = &ctx->sub->gl_programs[vs_id & VREND_PROGRAM_NQUEUE_MASK]; + LIST_FOR_EACH_ENTRY(ent, programs, head) { if (likely(ent->vs_fs_key != vs_fs_key)) continue; if (ent->ss[PIPE_SHADER_GEOMETRY] && @@ -1697,7 +1707,7 @@ static struct vrend_linked_shader_program *lookup_shader_program(struct vrend_co continue; /* put the entry in front */ list_del(&ent->head); - list_add(&ent->head, &ctx->sub->gl_programs); + list_add(&ent->head, programs); return ent; } @@ -1730,15 +1740,16 @@ static void vrend_free_programs(struct vrend_sub_context *sub) { struct vrend_linked_shader_program *ent, *tmp; - if (LIST_IS_EMPTY(&sub->gl_programs) && LIST_IS_EMPTY(&sub->cs_programs)) - return; - - LIST_FOR_EACH_ENTRY_SAFE(ent, tmp, &sub->gl_programs, head) { - vrend_destroy_program(ent); + if (!LIST_IS_EMPTY(&sub->cs_programs)) { + LIST_FOR_EACH_ENTRY_SAFE(ent, tmp, &sub->cs_programs, head) + vrend_destroy_program(ent); } - LIST_FOR_EACH_ENTRY_SAFE(ent, tmp, &sub->cs_programs, head) { - vrend_destroy_program(ent); + for (unsigned i = 0; i < VREND_PROGRAM_NQUEUES; ++i) { + if (!LIST_IS_EMPTY(&sub->gl_programs[i])) { + LIST_FOR_EACH_ENTRY_SAFE(ent, tmp, &sub->gl_programs[i], head) + vrend_destroy_program(ent); + } } } @@ -10507,7 +10518,8 @@ void vrend_renderer_create_sub_ctx(struct vrend_context *ctx, int sub_ctx_id) glBindFramebuffer(GL_FRAMEBUFFER, sub->fb_id); glGenFramebuffers(2, sub->blit_fb_ids); - list_inithead(&sub->gl_programs); + for (int i = 0; i < VREND_PROGRAM_NQUEUES; ++i) + list_inithead(&sub->gl_programs[i]); list_inithead(&sub->cs_programs); list_inithead(&sub->streamout_list);