diff --git a/src/Makefile.am b/src/Makefile.am index c7c0943..9d5fd32 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,5 +1,5 @@ SUBDIRS := gallium/auxiliary -AM_LDFLAGS = gallium/auxiliary/libgallium.la $(EPOXY_LIBS) $(GL_LIBS) -lgbm -lm -ldl -I./gallium/include +AM_LDFLAGS = $(EPOXY_LIBS) $(GL_LIBS) -lgbm -lm -ldl -I./gallium/include AM_CFLAGS = \ -I$(top_srcdir)/src/gallium/drivers/virgl \ @@ -31,7 +31,7 @@ GM_LDFLAGS = -Wl,-Bsymbolic -version-number 0:1 -no-undefined libvirglrenderer_la_SOURCES = virglrenderer.c libvirglrenderer_ladir = $(libdir) -libvirglrenderer_la_LIBADD = libvrend.la +libvirglrenderer_la_LIBADD = libvrend.la gallium/auxiliary/libgallium.la libvirglrenderer_la_LDFLAGS = $(GM_LDFLAGS) $(EPOXY_LDFLAGS) libvirglrendererincludedir = ${includedir} diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index fd2c2d3..04a779e 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -1,4 +1,6 @@ C_SOURCES := \ + cso_cache/cso_cache.c \ + cso_cache/cso_hash.c \ os/os_misc.c \ os/os_process.c \ os/os_time.c \ @@ -8,16 +10,29 @@ C_SOURCES := \ tgsi/tgsi_info.c \ tgsi/tgsi_iterate.c \ tgsi/tgsi_parse.c \ + tgsi/tgsi_sanity.c \ tgsi/tgsi_scan.c \ tgsi/tgsi_strings.c \ tgsi/tgsi_text.c \ tgsi/tgsi_transform.c \ tgsi/tgsi_ureg.c \ tgsi/tgsi_util.c \ + util/u_debug.c \ + util/u_format.c \ util/u_format_srgb.c \ util/u_format_table.c \ + util/u_format_etc.c \ + util/u_format_rgtc.c \ + util/u_format_latc.c \ + util/u_format_s3tc.c \ + util/u_format_yuv.c \ + util/u_format_zs.c \ + util/u_format_other.c \ util/u_hash_table.c \ - util/u_texture.c - util/u_bitmask.c + util/u_texture.c \ + util/u_bitmask.c \ + util/u_cpu_detect.c \ + util/u_surface.c \ + util/u_math.c diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c new file mode 100644 index 0000000..dd56e4a --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -0,0 +1,322 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin + */ + +#include "util/u_debug.h" + +#include "util/u_memory.h" + +#include "cso_cache.h" +#include "cso_hash.h" + + +struct cso_cache { + struct cso_hash *hashes[CSO_CACHE_MAX]; + int max_size; + + cso_sanitize_callback sanitize_cb; + void *sanitize_data; +}; + +#if 1 +static unsigned hash_key(const void *key, unsigned key_size) +{ + unsigned *ikey = (unsigned *)key; + unsigned hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} +#else +static unsigned hash_key(const unsigned char *p, int n) +{ + unsigned h = 0; + unsigned g; + + while (n--) { + h = (h << 4) + *p++; + if ((g = (h & 0xf0000000)) != 0) + h ^= g >> 23; + h &= ~g; + } + return h; +} +#endif + +unsigned cso_construct_key(void *item, int item_size) +{ + return hash_key((item), item_size); +} + +static INLINE struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type) +{ + struct cso_hash *hash; + hash = sc->hashes[type]; + return hash; +} + +static void delete_blend_state(void *state, void *data) +{ + struct cso_blend *cso = (struct cso_blend *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_depth_stencil_state(void *state, void *data) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_sampler_state(void *state, void *data) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_rasterizer_state(void *state, void *data) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_velements(void *state, void *data) +{ + struct cso_velements *cso = (struct cso_velements *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static INLINE void delete_cso(void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: + delete_blend_state(state, 0); + break; + case CSO_SAMPLER: + delete_sampler_state(state, 0); + break; + case CSO_DEPTH_STENCIL_ALPHA: + delete_depth_stencil_state(state, 0); + break; + case CSO_RASTERIZER: + delete_rasterizer_state(state, 0); + break; + case CSO_VELEMENTS: + delete_velements(state, 0); + break; + default: + assert(0); + FREE(state); + } +} + + +static INLINE void sanitize_hash(struct cso_cache *sc, + struct cso_hash *hash, + enum cso_cache_type type, + int max_size) +{ + if (sc->sanitize_cb) + sc->sanitize_cb(hash, type, max_size, sc->sanitize_data); +} + + +static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) +{ + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; + int to_remove = (max_size < max_entries) * max_entries/4; + if (hash_size > max_size) + to_remove += hash_size - max_size; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + struct cso_hash_iter iter = cso_hash_first_node(hash); + void *cso = cso_hash_take(hash, cso_hash_iter_key(iter)); + delete_cso(cso, type); + --to_remove; + } +} + +struct cso_hash_iter +cso_insert_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *state) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + sanitize_hash(sc, hash, type, sc->max_size); + + return cso_hash_insert(hash, hash_key, state); +} + +struct cso_hash_iter +cso_find_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + + return cso_hash_find(hash, hash_key); +} + + +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ) +{ + struct cso_hash_iter iter = cso_hash_find(hash, hash_key); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) { + /* We found a match + */ + return iter_data; + } + iter = cso_hash_iter_next(iter); + } + return NULL; +} + + +struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *templ, unsigned size) +{ + struct cso_hash_iter iter = cso_find_state(sc, hash_key, type); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) + return iter; + iter = cso_hash_iter_next(iter); + } + return iter; +} + +void * cso_take_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + return cso_hash_take(hash, hash_key); +} + +struct cso_cache *cso_cache_create(void) +{ + struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + int i; + if (sc == NULL) + return NULL; + + sc->max_size = 4096; + for (i = 0; i < CSO_CACHE_MAX; i++) + sc->hashes[i] = cso_hash_create(); + + sc->sanitize_cb = sanitize_cb; + sc->sanitize_data = 0; + + return sc; +} + +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + struct cso_hash_iter iter; + + iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + void *state = cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + func(state, user_data); + } + } +} + +void cso_cache_delete(struct cso_cache *sc) +{ + int i; + assert(sc); + + if (!sc) + return; + + /* delete driver data */ + cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0); + cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0); + cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); + cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); + cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0); + + for (i = 0; i < CSO_CACHE_MAX; i++) + cso_hash_delete(sc->hashes[i]); + + FREE(sc); +} + +void cso_set_maximum_cache_size(struct cso_cache *sc, int number) +{ + int i; + + sc->max_size = number; + + for (i = 0; i < CSO_CACHE_MAX; i++) + sanitize_hash(sc, sc->hashes[i], i, sc->max_size); +} + +int cso_maximum_cache_size(const struct cso_cache *sc) +{ + return sc->max_size; +} + +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data) +{ + sc->sanitize_cb = cb; + sc->sanitize_data = user_data; +} + diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h new file mode 100644 index 0000000..052245f --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2007-2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /** + * @file + * Constant State Object (CSO) cache. + * + * The basic idea is that the states are created via the + * create_state/bind_state/delete_state semantics. The driver is expected to + * perform as much of the Gallium state translation to whatever its internal + * representation is during the create call. Gallium then has a caching + * mechanism where it stores the created states. When the pipeline needs an + * actual state change, a bind call is issued. In the bind call the driver + * gets its already translated representation. + * + * Those semantics mean that the driver doesn't do the repeated translations + * of states on every frame, but only once, when a new state is actually + * created. + * + * Even on hardware that doesn't do any kind of state cache, it makes the + * driver look a lot neater, plus it avoids all the redundant state + * translations on every frame. + * + * Currently our constant state objects are: + * - alpha test + * - blend + * - depth stencil + * - fragment shader + * - rasterizer (old setup) + * - sampler + * - vertex shader + * - vertex elements + * + * Things that are not constant state objects include: + * - blend_color + * - clip_state + * - clear_color_state + * - constant_buffer + * - feedback_state + * - framebuffer_state + * - polygon_stipple + * - scissor_state + * - texture_state + * - viewport_state + * + * @author Zack Rusin + */ + +#ifndef CSO_CACHE_H +#define CSO_CACHE_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures + * returned by value to be fully defined */ +#include "cso_hash.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +enum cso_cache_type { + CSO_RASTERIZER, + CSO_BLEND, + CSO_DEPTH_STENCIL_ALPHA, + CSO_SAMPLER, + CSO_VELEMENTS, + CSO_CACHE_MAX, +}; + +typedef void (*cso_state_callback)(void *ctx, void *obj); + +typedef void (*cso_sanitize_callback)(struct cso_hash *hash, + enum cso_cache_type type, + int max_size, + void *user_data); + +struct cso_cache; + +struct cso_blend { + struct pipe_blend_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_depth_stencil_alpha { + struct pipe_depth_stencil_alpha_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_rasterizer { + struct pipe_rasterizer_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_sampler { + struct pipe_sampler_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_velems_state { + unsigned count; + struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS]; +}; + +struct cso_velements { + struct cso_velems_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +unsigned cso_construct_key(void *item, int item_size); + +struct cso_cache *cso_cache_create(void); +void cso_cache_delete(struct cso_cache *sc); + +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data); + +struct cso_hash_iter cso_insert_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *state); +struct cso_hash_iter cso_find_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type); +struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *templ, unsigned size); +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data); +void * cso_take_state(struct cso_cache *sc, unsigned hash_key, + enum cso_cache_type type); + +void cso_set_maximum_cache_size(struct cso_cache *sc, int number); +int cso_maximum_cache_size(const struct cso_cache *sc); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 7621b6a..39a4296 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -200,7 +200,7 @@ tgsi_default_declaration_interp( void ) struct tgsi_declaration_interp di; di.Interpolate = TGSI_INTERPOLATE_CONSTANT; - di.Centroid = 0; + di.Location = TGSI_INTERPOLATE_LOC_CENTER; di.CylindricalWrap = 0; di.Padding = 0; @@ -209,7 +209,7 @@ tgsi_default_declaration_interp( void ) static struct tgsi_declaration_interp tgsi_build_declaration_interp(unsigned interpolate, - unsigned centroid, + unsigned interpolate_location, unsigned cylindrical_wrap, struct tgsi_declaration *declaration, struct tgsi_header *header) @@ -217,7 +217,7 @@ tgsi_build_declaration_interp(unsigned interpolate, struct tgsi_declaration_interp di; di.Interpolate = interpolate; - di.Centroid = centroid; + di.Location = interpolate_location; di.CylindricalWrap = cylindrical_wrap; di.Padding = 0; @@ -297,10 +297,10 @@ tgsi_default_declaration_sampler_view(void) struct tgsi_declaration_sampler_view dsv; dsv.Resource = TGSI_TEXTURE_BUFFER; - dsv.ReturnTypeX = PIPE_TYPE_UNORM; - dsv.ReturnTypeY = PIPE_TYPE_UNORM; - dsv.ReturnTypeZ = PIPE_TYPE_UNORM; - dsv.ReturnTypeW = PIPE_TYPE_UNORM; + dsv.ReturnTypeX = TGSI_RETURN_TYPE_UNORM; + dsv.ReturnTypeY = TGSI_RETURN_TYPE_UNORM; + dsv.ReturnTypeZ = TGSI_RETURN_TYPE_UNORM; + dsv.ReturnTypeW = TGSI_RETURN_TYPE_UNORM; return dsv; } @@ -433,7 +433,7 @@ tgsi_build_full_declaration( size++; *di = tgsi_build_declaration_interp(full_decl->Interp.Interpolate, - full_decl->Interp.Centroid, + full_decl->Interp.Location, full_decl->Interp.CylindricalWrap, declaration, header); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index b2e30d2..972a37e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -29,7 +29,6 @@ #include "util/u_string.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_math.h" #include "tgsi_dump.h" #include "tgsi_info.h" #include "tgsi_iterate.h" @@ -44,8 +43,6 @@ struct dump_ctx { struct tgsi_iterate_context iter; - boolean dump_float_as_hex; - uint instno; uint immno; int indent; @@ -85,8 +82,7 @@ dump_enum( #define UID(I) ctx->dump_printf( ctx, "%u", I ) #define INSTID(I) ctx->dump_printf( ctx, "% 3u", I ) #define SID(I) ctx->dump_printf( ctx, "%d", I ) -#define FLT(F) ctx->dump_printf( ctx, "%10.8f", F ) -#define HFLT(F) ctx->dump_printf( ctx, "0x%08x", fui((F)) ) +#define FLT(F) ctx->dump_printf( ctx, "%10.4f", F ) #define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) const char * @@ -243,10 +239,7 @@ dump_imm_data(struct tgsi_iterate_context *iter, for (i = 0; i < num_tokens; i++) { switch (data_type) { case TGSI_IMM_FLOAT32: - if (ctx->dump_float_as_hex) - HFLT( data[i].Float ); - else - FLT( data[i].Float ); + FLT( data[i].Float ); break; case TGSI_IMM_UINT32: UID(data[i].Uint); @@ -336,15 +329,15 @@ iter_declaration( if ((decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeY) && (decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeZ) && (decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeW)) { - ENM(decl->SamplerView.ReturnTypeX, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeX, tgsi_return_type_names); } else { - ENM(decl->SamplerView.ReturnTypeX, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeX, tgsi_return_type_names); TXT(", "); - ENM(decl->SamplerView.ReturnTypeY, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeY, tgsi_return_type_names); TXT(", "); - ENM(decl->SamplerView.ReturnTypeZ, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeZ, tgsi_return_type_names); TXT(", "); - ENM(decl->SamplerView.ReturnTypeW, tgsi_type_names); + ENM(decl->SamplerView.ReturnTypeW, tgsi_return_type_names); } } @@ -356,8 +349,9 @@ iter_declaration( ENM( decl->Interp.Interpolate, tgsi_interpolate_names ); } - if (decl->Interp.Centroid) { - TXT( ", CENTROID" ); + if (decl->Interp.Location != TGSI_INTERPOLATE_LOC_CENTER) { + TXT( ", " ); + ENM( decl->Interp.Location, tgsi_interpolate_locations ); } if (decl->Interp.CylindricalWrap) { @@ -579,8 +573,11 @@ iter_instruction( } if (inst->Instruction.Texture) { - TXT( ", " ); - ENM( inst->Texture.Texture, tgsi_texture_names ); + if (!(inst->Instruction.Opcode >= TGSI_OPCODE_SAMPLE && + inst->Instruction.Opcode <= TGSI_OPCODE_GATHER4)) { + TXT( ", " ); + ENM( inst->Texture.Texture, tgsi_texture_names ); + } for (i = 0; i < inst->Texture.NumOffsets; i++) { TXT( ", " ); TXT(tgsi_file_name(inst->TexOffsets[i].File)); @@ -665,11 +662,6 @@ tgsi_dump( ctx.dump_printf = dump_ctx_printf; ctx.indentation = 0; - if (flags & TGSI_DUMP_FLOAT_AS_HEX) - ctx.dump_float_as_hex = TRUE; - else - ctx.dump_float_as_hex = FALSE; - tgsi_iterate_shader( tokens, &ctx.iter ); } @@ -731,11 +723,6 @@ tgsi_dump_str( ctx.ptr = str; ctx.left = (int)size; - if (flags & TGSI_DUMP_FLOAT_AS_HEX) - ctx.base.dump_float_as_hex = TRUE; - else - ctx.base.dump_float_as_hex = FALSE; - tgsi_iterate_shader( tokens, &ctx.base.iter ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 55da60a..834568b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -136,18 +136,6 @@ micro_cmp(union tgsi_exec_channel *dst, dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; } -static void -micro_cnd(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2) -{ - dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; - dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; - dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; - dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; -} - static void micro_cos(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -451,24 +439,6 @@ micro_sne(union tgsi_exec_channel *dst, dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; } -static void -micro_sfl(union tgsi_exec_channel *dst) -{ - dst->f[0] = 0.0f; - dst->f[1] = 0.0f; - dst->f[2] = 0.0f; - dst->f[3] = 0.0f; -} - -static void -micro_str(union tgsi_exec_channel *dst) -{ - dst->f[0] = 1.0f; - dst->f[1] = 1.0f; - dst->f[2] = 1.0f; - dst->f[3] = 1.0f; -} - static void micro_trunc(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -689,7 +659,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_vector *outputs; inputs = align_malloc(sizeof(struct tgsi_exec_vector) * - TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS, + TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 16); if (!inputs) @@ -789,6 +759,11 @@ tgsi_exec_machine_bind_shader( break; case TGSI_TOKEN_TYPE_PROPERTY: + if (mach->Processor == TGSI_PROCESSOR_GEOMETRY) { + if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { + mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; + } + } break; default: @@ -823,8 +798,8 @@ tgsi_exec_machine_create( void ) mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; - mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); - mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); + mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); + mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); if (!mach->Inputs || !mach->Outputs) goto fail; @@ -905,22 +880,6 @@ micro_div( } } -static void -micro_rcc(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ - uint i; - - for (i = 0; i < 4; i++) { - float recip = 1.0f / src->f[i]; - - if (recip > 0.0f) - dst->f[i] = CLAMP(recip, 5.42101e-020f, 1.84467e+019f); - else - dst->f[i] = CLAMP(recip, -1.84467e+019f, -5.42101e-020f); - } -} - static void micro_lt( union tgsi_exec_channel *dst, @@ -1621,6 +1580,9 @@ emit_vertex(struct tgsi_exec_machine *mach) if ((mach->ExecMask & (1 << i))) */ if (mach->ExecMask) { + if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices) + return; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; } @@ -1941,7 +1903,8 @@ exec_txd(struct tgsi_exec_machine *mach, case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_CUBE_ARRAY: - /* only TEXTURE_CUBE_ARRAY actually needs W */ + case TGSI_TEXTURE_SHADOWCUBE: + /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */ FETCH(&r[0], 0, TGSI_CHAN_X); FETCH(&r[1], 0, TGSI_CHAN_Y); FETCH(&r[2], 0, TGSI_CHAN_Z); @@ -1996,6 +1959,7 @@ exec_txf(struct tgsi_exec_machine *mach, case TGSI_TEXTURE_3D: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_2D_ARRAY_MSAA: IFETCH(&r[2], 0, TGSI_CHAN_Z); /* fallthrough */ case TGSI_TEXTURE_2D: @@ -2004,6 +1968,7 @@ exec_txf(struct tgsi_exec_machine *mach, case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_2D_MSAA: IFETCH(&r[1], 0, TGSI_CHAN_Y); /* fallthrough */ case TGSI_TEXTURE_BUFFER: @@ -2451,27 +2416,6 @@ exec_declaration(struct tgsi_exec_machine *mach, } } - -typedef void (* micro_op)(union tgsi_exec_channel *dst); - -static void -exec_vector(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - micro_op op, - enum tgsi_exec_datatype dst_datatype) -{ - unsigned int chan; - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - union tgsi_exec_channel dst; - - op(&dst); - store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); - } - } -} - typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src); @@ -2603,6 +2547,40 @@ exec_vector_trinary(struct tgsi_exec_machine *mach, } } +typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3); + +static void +exec_vector_quaternary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_quaternary_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[4]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); + op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); + } + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + static void exec_dp3(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) @@ -2727,70 +2705,6 @@ exec_dp2(struct tgsi_exec_machine *mach, } } -static void -exec_nrm4(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - unsigned int chan; - union tgsi_exec_channel arg[4]; - union tgsi_exec_channel scale; - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&scale, &arg[0], &arg[0]); - - for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { - union tgsi_exec_channel product; - - fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); - micro_mul(&product, &arg[chan], &arg[chan]); - micro_add(&scale, &scale, &product); - } - - micro_rsq(&scale, &scale); - - for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_W; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - micro_mul(&arg[chan], &arg[chan], &scale); - store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void -exec_nrm3(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { - unsigned int chan; - union tgsi_exec_channel arg[3]; - union tgsi_exec_channel scale; - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&scale, &arg[0], &arg[0]); - - for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { - union tgsi_exec_channel product; - - fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); - micro_mul(&product, &arg[chan], &arg[chan]); - micro_add(&scale, &scale, &product); - } - - micro_rsq(&scale, &scale); - - for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_Z; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - micro_mul(&arg[chan], &arg[chan], &scale); - store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } - } - - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - } -} - static void exec_scs(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) @@ -2818,99 +2732,6 @@ exec_scs(struct tgsi_exec_machine *mach, } } -static void -exec_x2d(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[4]; - union tgsi_exec_channel d[2]; - - fetch_source(mach, &r[0], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) { - fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[2], &r[2], &r[0]); - fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[3], &r[3], &r[1]); - micro_add(&r[2], &r[2], &r[3]); - fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_add(&d[0], &r[2], &r[3]); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) { - fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[2], &r[2], &r[0]); - fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[3], &r[3], &r[1]); - micro_add(&r[2], &r[2], &r[3]); - fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_add(&d[1], &r[2], &r[3]); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { - store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - } -} - -static void -exec_rfl(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[9]; - - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { - /* r0 = dp3(src0, src0) */ - fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[0], &r[2], &r[2]); - fetch_source(mach, &r[4], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[8], &r[4], &r[4]); - micro_add(&r[0], &r[0], &r[8]); - fetch_source(mach, &r[6], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[8], &r[6], &r[6]); - micro_add(&r[0], &r[0], &r[8]); - - /* r1 = dp3(src0, src1) */ - fetch_source(mach, &r[3], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[1], &r[2], &r[3]); - fetch_source(mach, &r[5], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[8], &r[4], &r[5]); - micro_add(&r[1], &r[1], &r[8]); - fetch_source(mach, &r[7], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - micro_mul(&r[8], &r[6], &r[7]); - micro_add(&r[1], &r[1], &r[8]); - - /* r1 = 2 * r1 / r0 */ - micro_add(&r[1], &r[1], &r[1]); - micro_div(&r[1], &r[1], &r[0]); - - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - micro_mul(&r[2], &r[2], &r[1]); - micro_sub(&r[2], &r[2], &r[3]); - store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - micro_mul(&r[4], &r[4], &r[1]); - micro_sub(&r[4], &r[4], &r[5]); - store_dest(mach, &r[4], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { - micro_mul(&r[6], &r[6], &r[1]); - micro_sub(&r[6], &r[6], &r[7]); - store_dest(mach, &r[6], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - } - } - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - } -} - static void exec_xpd(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) @@ -3298,10 +3119,10 @@ micro_idiv(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1) { - dst->i[0] = src0->i[0] / src1->i[0]; - dst->i[1] = src0->i[1] / src1->i[1]; - dst->i[2] = src0->i[2] / src1->i[2]; - dst->i[3] = src0->i[3] / src1->i[3]; + dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; + dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; + dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; + dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; } static void @@ -3570,6 +3391,119 @@ micro_ucmp(union tgsi_exec_channel *dst, dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; } +/** + * Signed bitfield extract (i.e. sign-extend the extracted bits) + */ +static void +micro_ibfe(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + int i; + for (i = 0; i < 4; i++) { + int width = src2->i[i] & 0x1f; + int offset = src1->i[i] & 0x1f; + if (width == 0) + dst->i[i] = 0; + else if (width + offset < 32) + dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); + else + dst->i[i] = src0->i[i] >> offset; + } +} + +/** + * Unsigned bitfield extract + */ +static void +micro_ubfe(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + int i; + for (i = 0; i < 4; i++) { + int width = src2->u[i] & 0x1f; + int offset = src1->u[i] & 0x1f; + if (width == 0) + dst->u[i] = 0; + else if (width + offset < 32) + dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); + else + dst->u[i] = src0->u[i] >> offset; + } +} + +/** + * Bitfield insert: copy low bits from src1 into a region of src0. + */ +static void +micro_bfi(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3) +{ + int i; + for (i = 0; i < 4; i++) { + int width = src3->u[i] & 0x1f; + int offset = src2->u[i] & 0x1f; + int bitmask = ((1 << width) - 1) << offset; + dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); + } +} + +static void +micro_brev(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = util_bitreverse(src->u[0]); + dst->u[1] = util_bitreverse(src->u[1]); + dst->u[2] = util_bitreverse(src->u[2]); + dst->u[3] = util_bitreverse(src->u[3]); +} + +static void +micro_popc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = util_bitcount(src->u[0]); + dst->u[1] = util_bitcount(src->u[1]); + dst->u[2] = util_bitcount(src->u[2]); + dst->u[3] = util_bitcount(src->u[3]); +} + +static void +micro_lsb(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = ffs(src->u[0]) - 1; + dst->i[1] = ffs(src->u[1]) - 1; + dst->i[2] = ffs(src->u[2]) - 1; + dst->i[3] = ffs(src->u[3]) - 1; +} + +static void +micro_imsb(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = util_last_bit_signed(src->i[0]) - 1; + dst->i[1] = util_last_bit_signed(src->i[1]) - 1; + dst->i[2] = util_last_bit_signed(src->i[2]) - 1; + dst->i[3] = util_last_bit_signed(src->i[3]) - 1; +} + +static void +micro_umsb(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = util_last_bit(src->u[0]) - 1; + dst->i[1] = util_last_bit(src->u[1]) - 1; + dst->i[2] = util_last_bit(src->u[2]) - 1; + dst->i[3] = util_last_bit(src->u[3]) - 1; +} + static void exec_instruction( struct tgsi_exec_machine *mach, @@ -3657,10 +3591,6 @@ exec_instruction( exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_CND: - exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_SQRT: exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -3705,10 +3635,6 @@ exec_instruction( exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_RCC: - exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_DPH: exec_dph(mach, inst); break; @@ -3749,18 +3675,10 @@ exec_instruction( assert (0); break; - case TGSI_OPCODE_RFL: - exec_rfl(mach, inst); - break; - case TGSI_OPCODE_SEQ: exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_SFL: - exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_SGT: exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -3777,10 +3695,6 @@ exec_instruction( exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_STR: - exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_TEX: /* simple texture lookup */ /* src[0] = texcoord */ @@ -3834,22 +3748,10 @@ exec_instruction( assert (0); break; - case TGSI_OPCODE_X2D: - exec_x2d(mach, inst); - break; - - case TGSI_OPCODE_ARA: - assert (0); - break; - case TGSI_OPCODE_ARR: exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_BRA: - assert (0); - break; - case TGSI_OPCODE_CAL: /* skip the call if no execution channels are enabled */ if (mach->ExecMask) { @@ -3946,14 +3848,6 @@ exec_instruction( exec_scs(mach, inst); break; - case TGSI_OPCODE_NRM: - exec_nrm3(mach, inst); - break; - - case TGSI_OPCODE_NRM4: - exec_nrm4(mach, inst); - break; - case TGSI_OPCODE_DIV: exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -4417,6 +4311,31 @@ exec_instruction( /* src[2] = sampler unit */ exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); break; + + case TGSI_OPCODE_IBFE: + exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + case TGSI_OPCODE_UBFE: + exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_BFI: + exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_BREV: + exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_POPC: + exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_LSB: + exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_IMSB: + exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + case TGSI_OPCODE_UMSB: + exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); + break; default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 9b1995c..cc5a916 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -193,7 +193,6 @@ struct tgsi_sampler #define TGSI_EXEC_NUM_TEMP_R 4 #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) -#define TGSI_EXEC_NUM_ADDRS 1 /* predicate register */ #define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9) @@ -213,11 +212,11 @@ struct tgsi_sampler * input register files, this is the stride between two 1D * arrays. */ -#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17 +#define TGSI_EXEC_MAX_INPUT_ATTRIBS PIPE_MAX_SHADER_INPUTS -/* The maximum number of constant vectors per constant buffer. +/* The maximum number of bytes per constant buffer. */ -#define TGSI_EXEC_MAX_CONST_BUFFER 4096 +#define TGSI_EXEC_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4])) /* The maximum number of vertices per primitive */ #define TGSI_MAX_PRIM_VERTICES 6 @@ -297,6 +296,7 @@ struct tgsi_exec_machine unsigned *Primitives; unsigned NumOutputs; unsigned MaxGeometryShaderOutputs; + unsigned MaxOutputVertices; /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; @@ -426,14 +426,14 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return TGSI_EXEC_MAX_NESTING; case PIPE_SHADER_CAP_MAX_INPUTS: return TGSI_EXEC_MAX_INPUT_ATTRIBS; - case PIPE_SHADER_CAP_MAX_CONSTS: - return TGSI_EXEC_MAX_CONST_BUFFER; + case PIPE_SHADER_CAP_MAX_OUTPUTS: + return 32; + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return TGSI_EXEC_MAX_CONST_BUFFER_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return PIPE_MAX_CONSTANT_BUFFERS; case PIPE_SHADER_CAP_MAX_TEMPS: return TGSI_EXEC_NUM_TEMPS; - case PIPE_SHADER_CAP_MAX_ADDRS: - return TGSI_EXEC_NUM_ADDRS; case PIPE_SHADER_CAP_MAX_PREDS: return TGSI_EXEC_NUM_PREDS; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: @@ -449,11 +449,19 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return PIPE_MAX_SAMPLERS; + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + return PIPE_MAX_SHADER_SAMPLER_VIEWS; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; - default: + case PIPE_SHADER_CAP_DOUBLES: return 0; } + /* if we get here, we missed a shader cap above (and should have seen + * a compiler warning.) + */ + return 0; } #if defined __cplusplus diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 565f274..c90d24c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -56,7 +56,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, 0, 0, COMP, "CND", TGSI_OPCODE_CND }, + { 0, 0, 0, 0, 0, 0, NONE, "", 19 }, /* removed */ { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, { 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ @@ -71,7 +71,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, { 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */ { 1, 1, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS }, - { 1, 1, 0, 0, 0, 0, REPL, "RCC", TGSI_OPCODE_RCC }, + { 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */ { 1, 2, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH }, { 1, 1, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS }, { 1, 1, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX }, @@ -81,14 +81,14 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, COMP, "PK2US", TGSI_OPCODE_PK2US }, { 1, 1, 0, 0, 0, 0, COMP, "PK4B", TGSI_OPCODE_PK4B }, { 1, 1, 0, 0, 0, 0, COMP, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 2, 0, 0, 0, 0, COMP, "RFL", TGSI_OPCODE_RFL }, + { 0, 1, 0, 0, 0, 1, NONE, "", 44 }, /* removed */ { 1, 2, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 2, 0, 0, 0, 0, REPL, "SFL", TGSI_OPCODE_SFL }, + { 0, 1, 0, 0, 0, 1, NONE, "", 46 }, /* removed */ { 1, 2, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT }, { 1, 1, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN }, { 1, 2, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE }, { 1, 2, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE }, - { 1, 2, 0, 0, 0, 0, REPL, "STR", TGSI_OPCODE_STR }, + { 0, 1, 0, 0, 0, 1, NONE, "", 51 }, /* removed */ { 1, 2, 1, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX }, { 1, 4, 1, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD }, { 1, 2, 1, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP }, @@ -96,17 +96,17 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, COMP, "UP2US", TGSI_OPCODE_UP2US }, { 1, 1, 0, 0, 0, 0, COMP, "UP4B", TGSI_OPCODE_UP4B }, { 1, 1, 0, 0, 0, 0, COMP, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 3, 0, 0, 0, 0, COMP, "X2D", TGSI_OPCODE_X2D }, - { 1, 1, 0, 0, 0, 0, COMP, "ARA", TGSI_OPCODE_ARA }, + { 0, 1, 0, 0, 0, 1, NONE, "", 59 }, /* removed */ + { 0, 1, 0, 0, 0, 1, NONE, "", 60 }, /* removed */ { 1, 1, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR }, - { 0, 1, 0, 0, 0, 0, NONE, "BRA", TGSI_OPCODE_BRA }, + { 0, 1, 0, 0, 0, 1, NONE, "", 62 }, /* removed */ { 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL }, { 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET }, { 1, 1, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG }, { 1, 3, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP }, { 1, 1, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS }, { 1, 2, 1, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB }, - { 1, 1, 0, 0, 0, 0, COMP, "NRM", TGSI_OPCODE_NRM }, + { 0, 1, 0, 0, 0, 1, NONE, "", 69 }, /* removed */ { 1, 2, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV }, { 1, 2, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 }, { 1, 2, 1, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL }, @@ -116,8 +116,8 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 1, NONE, "", 76 }, /* removed */ { 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE }, { 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, 1, 0, NONE, "", 79 }, /* removed */ - { 0, 0, 0, 0, 1, 0, NONE, "", 80 }, /* removed */ + { 1, 1, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, + { 1, 1, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, { 0, 1, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA }, { 1, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA }, { 1, 1, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL }, @@ -134,8 +134,8 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 1, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF }, { 1, 2, 1, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ }, { 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT }, - { 0, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, - { 0, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, + { 0, 1, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT }, + { 0, 1, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, { 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, { 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB }, { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, @@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, { 1, 2, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, { 1, 2, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, - { 1, 1, 0, 0, 0, 0, REPL, "NRM4", TGSI_OPCODE_NRM4 }, + { 0, 1, 0, 0, 0, 1, NONE, "", 112 }, /* removed */ { 0, 1, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, { 0, 1, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ { 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, @@ -222,6 +222,19 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, { 1, 3, 1, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 }, + { 1, 2, 1, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ }, + { 1, 3, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE }, + { 1, 3, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE }, + { 1, 4, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI }, + { 1, 1, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV }, + { 1, 1, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC }, + { 1, 1, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB }, + { 1, 1, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB }, + { 1, 1, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB }, + + { 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID }, + { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE }, + { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET }, }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index cc5c03b..147d989 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -75,7 +75,6 @@ OP12(SGE) OP13(MAD) OP12(SUB) OP13(LRP) -OP13(CND) OP11(SQRT) OP13(DP2A) OP11(FRC) @@ -87,7 +86,6 @@ OP11(LG2) OP12(POW) OP12(XPD) OP11(ABS) -OP11(RCC) OP12(DPH) OP11(COS) OP11(DDX) @@ -97,14 +95,11 @@ OP11(PK2H) OP11(PK2US) OP11(PK4B) OP11(PK4UB) -OP12(RFL) OP12(SEQ) -OP12(SFL) OP12(SGT) OP11(SIN) OP12(SLE) OP12(SNE) -OP12(STR) OP12_TEX(TEX) OP14_TEX(TXD) OP12_TEX(TXP) @@ -112,17 +107,13 @@ OP11(UP2H) OP11(UP2US) OP11(UP4B) OP11(UP4UB) -OP13(X2D) -OP11(ARA) OP11(ARR) -OP01(BRA) OP00_LBL(CAL) OP00(RET) OP11(SSG) OP13(CMP) OP11(SCS) OP12_TEX(TXB) -OP11(NRM) OP12(DIV) OP12(DP2) OP12_TEX(TXL) @@ -146,14 +137,13 @@ OP13(SAD) OP12_TEX(TXF) OP12_TEX(TXQ) OP00(CONT) -OP00(EMIT) -OP00(ENDPRIM) +OP01(EMIT) +OP01(ENDPRIM) OP00_LBL(BGNLOOP) OP00(BGNSUB) OP00_LBL(ENDLOOP) OP00(ENDSUB) OP00(NOP) -OP11(NRM4) OP01(CALLNZ) OP01(BREAKC) OP01(KILL_IF) @@ -201,6 +191,7 @@ OP13_SAMPLE(GATHER4) OP12(SVIEWINFO) OP13(SAMPLE_POS) OP12(SAMPLE_INFO) +OP11(UARL) OP13(UCMP) diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 2e450a4..bfcca48 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -150,6 +150,9 @@ tgsi_dup_tokens(const struct tgsi_token *tokens); struct tgsi_token * tgsi_alloc_tokens(unsigned num_tokens); +void +tgsi_free_tokens(const struct tgsi_token *tokens); + #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c new file mode 100644 index 0000000..fbfe652 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -0,0 +1,562 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_prim.h" +#include "cso_cache/cso_hash.h" +#include "tgsi_sanity.h" +#include "tgsi_info.h" +#include "tgsi_iterate.h" + + +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE) + + +typedef struct { + uint file : 28; + /* max 2 dimensions */ + uint dimensions : 4; + uint indices[2]; +} scan_register; + +struct sanity_check_ctx +{ + struct tgsi_iterate_context iter; + struct cso_hash *regs_decl; + struct cso_hash *regs_used; + struct cso_hash *regs_ind_used; + + uint num_imms; + uint num_instructions; + uint index_of_END; + + uint errors; + uint warnings; + uint implied_array_size; + + boolean print; +}; + +static INLINE unsigned +scan_register_key(const scan_register *reg) +{ + unsigned key = reg->file; + key |= (reg->indices[0] << 4); + key |= (reg->indices[1] << 18); + + return key; +} + +static void +fill_scan_register1d(scan_register *reg, + uint file, uint index) +{ + reg->file = file; + reg->dimensions = 1; + reg->indices[0] = index; + reg->indices[1] = 0; +} + +static void +fill_scan_register2d(scan_register *reg, + uint file, uint index1, uint index2) +{ + reg->file = file; + reg->dimensions = 2; + reg->indices[0] = index1; + reg->indices[1] = index2; +} + +static void +scan_register_dst(scan_register *reg, + struct tgsi_full_dst_register *dst) +{ + if (dst->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + fill_scan_register2d(reg, + dst->Register.File, + dst->Register.Index, + dst->Dimension.Index); + } else { + fill_scan_register1d(reg, + dst->Register.File, + dst->Register.Index); + } +} + +static void +scan_register_src(scan_register *reg, + struct tgsi_full_src_register *src) +{ + if (src->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + fill_scan_register2d(reg, + src->Register.File, + src->Register.Index, + src->Dimension.Index); + } else { + fill_scan_register1d(reg, + src->Register.File, + src->Register.Index); + } +} + +static scan_register * +create_scan_register_src(struct tgsi_full_src_register *src) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_src(reg, src); + + return reg; +} + +static scan_register * +create_scan_register_dst(struct tgsi_full_dst_register *dst) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_dst(reg, dst); + + return reg; +} + +static void +report_error( + struct sanity_check_ctx *ctx, + const char *format, + ... ) +{ + va_list args; + + if (!ctx->print) + return; + + debug_printf( "Error : " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); + ctx->errors++; +} + +static void +report_warning( + struct sanity_check_ctx *ctx, + const char *format, + ... ) +{ + va_list args; + + if (!ctx->print) + return; + + debug_printf( "Warning: " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); + ctx->warnings++; +} + +static boolean +check_file_name( + struct sanity_check_ctx *ctx, + uint file ) +{ + if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { + report_error( ctx, "(%u): Invalid register file name", file ); + return FALSE; + } + return TRUE; +} + +static boolean +is_register_declared( + struct sanity_check_ctx *ctx, + const scan_register *reg) +{ + void *data = cso_hash_find_data_from_template( + ctx->regs_decl, scan_register_key(reg), + (void*)reg, sizeof(scan_register)); + return data ? TRUE : FALSE; +} + +static boolean +is_any_register_declared( + struct sanity_check_ctx *ctx, + uint file ) +{ + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); + + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (reg->file == file) + return TRUE; + iter = cso_hash_iter_next(iter); + } + + return FALSE; +} + +static boolean +is_register_used( + struct sanity_check_ctx *ctx, + scan_register *reg) +{ + void *data = cso_hash_find_data_from_template( + ctx->regs_used, scan_register_key(reg), + reg, sizeof(scan_register)); + return data ? TRUE : FALSE; +} + + +static boolean +is_ind_register_used( + struct sanity_check_ctx *ctx, + scan_register *reg) +{ + return cso_hash_contains(ctx->regs_ind_used, reg->file); +} + +static const char *file_names[TGSI_FILE_COUNT] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM", + "PRED", + "SV", + "RES" +}; + +static boolean +check_register_usage( + struct sanity_check_ctx *ctx, + scan_register *reg, + const char *name, + boolean indirect_access ) +{ + if (!check_file_name( ctx, reg->file )) { + FREE(reg); + return FALSE; + } + + if (indirect_access) { + /* Note that 'index' is an offset relative to the value of the + * address register. No range checking done here.*/ + reg->indices[0] = 0; + reg->indices[1] = 0; + if (!is_any_register_declared( ctx, reg->file )) + report_error( ctx, "%s: Undeclared %s register", file_names[reg->file], name ); + if (!is_ind_register_used(ctx, reg)) + cso_hash_insert(ctx->regs_ind_used, reg->file, reg); + else + FREE(reg); + } + else { + if (!is_register_declared( ctx, reg )) { + if (reg->dimensions == 2) { + report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], reg->indices[1], name ); + } + else { + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], name ); + } + } + if (!is_register_used( ctx, reg )) + cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); + else + FREE(reg); + } + return TRUE; +} + +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + const struct tgsi_opcode_info *info; + uint i; + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + if (ctx->index_of_END != ~0) { + report_error( ctx, "Too many END instructions" ); + } + ctx->index_of_END = ctx->num_instructions; + } + + info = tgsi_get_opcode_info( inst->Instruction.Opcode ); + if (info == NULL) { + report_error( ctx, "(%u): Invalid instruction opcode", inst->Instruction.Opcode ); + return TRUE; + } + + if (info->num_dst != inst->Instruction.NumDstRegs) { + report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst ); + } + if (info->num_src != inst->Instruction.NumSrcRegs) { + report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src ); + } + + /* Check destination and source registers' validity. + * Mark the registers as used. + */ + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + scan_register *reg = create_scan_register_dst(&inst->Dst[i]); + check_register_usage( + ctx, + reg, + "destination", + FALSE ); + if (!inst->Dst[i].Register.WriteMask) { + report_error(ctx, "Destination register has empty writemask"); + } + } + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + scan_register *reg = create_scan_register_src(&inst->Src[i]); + check_register_usage( + ctx, + reg, + "source", + (boolean)inst->Src[i].Register.Indirect ); + if (inst->Src[i].Register.Indirect) { + scan_register *ind_reg = MALLOC(sizeof(scan_register)); + + fill_scan_register1d(ind_reg, + inst->Src[i].Indirect.File, + inst->Src[i].Indirect.Index); + check_register_usage( + ctx, + ind_reg, + "indirect", + FALSE ); + } + } + + ctx->num_instructions++; + + return TRUE; +} + +static void +check_and_declare(struct sanity_check_ctx *ctx, + scan_register *reg) +{ + if (is_register_declared( ctx, reg)) + report_error( ctx, "%s[%u]: The same register declared more than once", + file_names[reg->file], reg->indices[0] ); + cso_hash_insert(ctx->regs_decl, + scan_register_key(reg), + reg); +} + + +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint file; + uint i; + + /* No declarations allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but declaration found" ); + + /* Check registers' validity. + * Mark the registers as declared. + */ + file = decl->Declaration.File; + if (!check_file_name( ctx, file )) + return TRUE; + for (i = decl->Range.First; i <= decl->Range.Last; i++) { + /* declared TGSI_FILE_INPUT's for geometry processor + * have an implied second dimension */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + uint vert; + for (vert = 0; vert < ctx->implied_array_size; ++vert) { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register2d(reg, file, i, vert); + check_and_declare(ctx, reg); + } + } else { + scan_register *reg = MALLOC(sizeof(scan_register)); + if (decl->Declaration.Dimension) { + fill_scan_register2d(reg, file, i, decl->Dim.Index2D); + } else { + fill_scan_register1d(reg, file, i); + } + check_and_declare(ctx, reg); + } + } + + return TRUE; +} + +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + scan_register *reg; + + /* No immediates allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but immediate found" ); + + /* Mark the register as declared. + */ + reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, TGSI_FILE_IMMEDIATE, ctx->num_imms); + cso_hash_insert(ctx->regs_decl, scan_register_key(reg), reg); + ctx->num_imms++; + + /* Check data type validity. + */ + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 && + imm->Immediate.DataType != TGSI_IMM_UINT32 && + imm->Immediate.DataType != TGSI_IMM_INT32) { + report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType ); + return TRUE; + } + + return TRUE; +} + + +static boolean +iter_property( + struct tgsi_iterate_context *iter, + struct tgsi_full_property *prop ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + + if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY && + prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { + ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); + } + return TRUE; +} + +static boolean +epilog( + struct tgsi_iterate_context *iter ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + + /* There must be an END instruction somewhere. + */ + if (ctx->index_of_END == ~0) { + report_error( ctx, "Missing END instruction" ); + } + + /* Check if all declared registers were used. + */ + { + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); + + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) { + report_warning( ctx, "%s[%u]: Register never used", + file_names[reg->file], reg->indices[0] ); + } + iter = cso_hash_iter_next(iter); + } + } + + /* Print totals, if any. + */ + if (ctx->errors || ctx->warnings) + debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings ); + + return TRUE; +} + +static void +regs_hash_destroy(struct cso_hash *hash) +{ + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + iter = cso_hash_erase(hash, iter); + assert(reg->file < TGSI_FILE_COUNT); + FREE(reg); + } + cso_hash_delete(hash); +} + +boolean +tgsi_sanity_check( + const struct tgsi_token *tokens ) +{ + struct sanity_check_ctx ctx; + + ctx.iter.prolog = NULL; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.iterate_property = iter_property; + ctx.iter.epilog = epilog; + + ctx.regs_decl = cso_hash_create(); + ctx.regs_used = cso_hash_create(); + ctx.regs_ind_used = cso_hash_create(); + + ctx.num_imms = 0; + ctx.num_instructions = 0; + ctx.index_of_END = ~0; + + ctx.errors = 0; + ctx.warnings = 0; + ctx.implied_array_size = 0; + ctx.print = debug_get_option_print_sanity(); + + if (!tgsi_iterate_shader( tokens, &ctx.iter )) + return FALSE; + + regs_hash_destroy(ctx.regs_decl); + regs_hash_destroy(ctx.regs_used); + regs_hash_destroy(ctx.regs_ind_used); + return ctx.errors == 0; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 00fdcfb..e6011d2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -130,6 +130,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* check for indirect register reads */ if (src->Register.Indirect) { info->indirect_files |= (1 << src->Register.File); + info->indirect_files_read |= (1 << src->Register.File); } /* MSAA samplers */ @@ -150,6 +151,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; if (dst->Register.Indirect) { info->indirect_files |= (1 << dst->Register.File); + info->indirect_files_written |= (1 << dst->Register.File); } } @@ -163,6 +165,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, = &parse.FullToken.FullDeclaration; const uint file = fulldecl->Declaration.File; uint reg; + if (fulldecl->Declaration.Array) + info->array_max[file] = MAX2(info->array_max[file], fulldecl->Array.ArrayID); for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) { @@ -187,15 +191,18 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_semantic_name[reg] = (ubyte) semName; info->input_semantic_index[reg] = (ubyte) semIndex; info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; - info->input_centroid[reg] = (ubyte)fulldecl->Interp.Centroid; + info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; info->num_inputs++; - if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID) + info->uses_centroid = TRUE; + + if (semName == TGSI_SEMANTIC_PRIMID) + info->uses_primid = TRUE; + else if (procType == TGSI_PROCESSOR_FRAGMENT) { if (semName == TGSI_SEMANTIC_POSITION) info->reads_position = TRUE; - else if (semName == TGSI_SEMANTIC_PRIMID) - info->uses_primid = TRUE; else if (semName == TGSI_SEMANTIC_FACE) info->uses_frontface = TRUE; } @@ -213,6 +220,12 @@ tgsi_scan_shader(const struct tgsi_token *tokens, else if (semName == TGSI_SEMANTIC_VERTEXID) { info->uses_vertexid = TRUE; } + else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) { + info->uses_vertexid_nobase = TRUE; + } + else if (semName == TGSI_SEMANTIC_BASEVERTEX) { + info->uses_basevertex = TRUE; + } else if (semName == TGSI_SEMANTIC_PRIMID) { info->uses_primid = TRUE; } @@ -227,10 +240,26 @@ tgsi_scan_shader(const struct tgsi_token *tokens, if (semName == TGSI_SEMANTIC_CLIPDIST) { info->num_written_clipdistance += util_bitcount(fulldecl->Declaration.UsageMask); + info->clipdist_writemask |= + fulldecl->Declaration.UsageMask << (semIndex*4); } else if (semName == TGSI_SEMANTIC_CULLDIST) { info->num_written_culldistance += util_bitcount(fulldecl->Declaration.UsageMask); + info->culldist_writemask |= + fulldecl->Declaration.UsageMask << (semIndex*4); + } + else if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) { + info->writes_viewport_index = TRUE; + } + else if (semName == TGSI_SEMANTIC_LAYER) { + info->writes_layer = TRUE; + } + else if (semName == TGSI_SEMANTIC_PSIZE) { + info->writes_psize = TRUE; + } + else if (semName == TGSI_SEMANTIC_CLIPVERTEX) { + info->writes_clipvertex = TRUE; } } @@ -248,15 +277,6 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->writes_edgeflag = TRUE; } } - - if (procType == TGSI_PROCESSOR_GEOMETRY) { - if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) { - info->writes_viewport_index = TRUE; - } - else if (semName == TGSI_SEMANTIC_LAYER) { - info->writes_layer = TRUE; - } - } } } } @@ -277,13 +297,10 @@ tgsi_scan_shader(const struct tgsi_token *tokens, { const struct tgsi_full_property *fullprop = &parse.FullToken.FullProperty; + unsigned name = fullprop->Property.PropertyName; - info->properties[info->num_properties].name = - fullprop->Property.PropertyName; - memcpy(info->properties[info->num_properties].data, - fullprop->u, 8 * sizeof(unsigned));; - - ++info->num_properties; + assert(name < Elements(info->properties)); + info->properties[name] = fullprop->u[0].Data; } break; @@ -295,36 +312,19 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->uses_kill = (info->opcode_count[TGSI_OPCODE_KILL_IF] || info->opcode_count[TGSI_OPCODE_KILL]); - /* extract simple properties */ - for (i = 0; i < info->num_properties; ++i) { - switch (info->properties[i].name) { - case TGSI_PROPERTY_FS_COORD_ORIGIN: - info->origin_lower_left = info->properties[i].data[0]; - break; - case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: - info->pixel_center_integer = info->properties[i].data[0]; - break; - case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: - info->color0_writes_all_cbufs = info->properties[i].data[0]; - break; - case TGSI_PROPERTY_GS_INPUT_PRIM: - /* The dimensions of the IN decleration in geometry shader have - * to be deduced from the type of the input primitive. - */ - if (procType == TGSI_PROCESSOR_GEOMETRY) { - unsigned input_primitive = info->properties[i].data[0]; - int num_verts = u_vertices_per_prim(input_primitive); - int j; - info->file_count[TGSI_FILE_INPUT] = num_verts; - info->file_max[TGSI_FILE_INPUT] = - MAX2(info->file_max[TGSI_FILE_INPUT], num_verts - 1); - for (j = 0; j < num_verts; ++j) { - info->file_mask[TGSI_FILE_INPUT] |= (1 << j); - } - } - break; - default: - ; + /* The dimensions of the IN decleration in geometry shader have + * to be deduced from the type of the input primitive. + */ + if (procType == TGSI_PROCESSOR_GEOMETRY) { + unsigned input_primitive = + info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]; + int num_verts = u_vertices_per_prim(input_primitive); + int j; + info->file_count[TGSI_FILE_INPUT] = num_verts; + info->file_max[TGSI_FILE_INPUT] = + MAX2(info->file_max[TGSI_FILE_INPUT], num_verts - 1); + for (j = 0; j < num_verts; ++j) { + info->file_mask[TGSI_FILE_INPUT] |= (1 << j); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 0be2feb..5dc9267 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -45,7 +45,7 @@ struct tgsi_shader_info ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; - ubyte input_centroid[PIPE_MAX_SHADER_INPUTS]; + ubyte input_interpolate_loc[PIPE_MAX_SHADER_INPUTS]; ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ @@ -61,6 +61,8 @@ struct tgsi_shader_info int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ int const_file_max[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned array_max[TGSI_FILE_COUNT]; /**< highest index array per register file */ + uint immediate_count; /**< number of immediates declared */ uint num_instructions; @@ -72,17 +74,21 @@ struct tgsi_shader_info boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KILL or KILL_IF instruction used? */ + boolean uses_centroid; boolean uses_instanceid; boolean uses_vertexid; + boolean uses_vertexid_nobase; + boolean uses_basevertex; boolean uses_primid; boolean uses_frontface; - boolean origin_lower_left; - boolean pixel_center_integer; - boolean color0_writes_all_cbufs; + boolean writes_psize; + boolean writes_clipvertex; boolean writes_viewport_index; boolean writes_layer; boolean is_msaa_sampler[PIPE_MAX_SAMPLERS]; + unsigned clipdist_writemask; + unsigned culldist_writemask; unsigned num_written_culldistance; unsigned num_written_clipdistance; /** @@ -90,12 +96,14 @@ struct tgsi_shader_info * indirect addressing. The bits are (1 << TGSI_FILE_x), etc. */ unsigned indirect_files; + /** + * Bitmask indicating which register files are read / written with + * indirect addressing. The bits are (1 << TGSI_FILE_x). + */ + unsigned indirect_files_read; + unsigned indirect_files_written; - struct { - unsigned name; - unsigned data[8]; - } properties[TGSI_PROPERTY_COUNT]; - uint num_properties; + unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */ }; extern void diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index b0ba3ef..bd97544 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -81,7 +81,13 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = "PCOORD", "VIEWPORT_INDEX", "LAYER", - "CULLDIST" + "CULLDIST", + "SAMPLEID", + "SAMPLEPOS", + "SAMPLEMASK", + "INVOCATIONID", + "VERTEXID_NOBASE", + "BASEVERTEX", }; const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] = @@ -116,10 +122,12 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "FS_COORD_PIXEL_CENTER", "FS_COLOR0_WRITES_ALL_CBUFS", "FS_DEPTH_LAYOUT", - "VS_PROHIBIT_UCPS" + "VS_PROHIBIT_UCPS", + "GS_INVOCATIONS", + "VS_WINDOW_SPACE_POSITION" }; -const char *tgsi_type_names[5] = +const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] = { "UNORM", "SNORM", @@ -136,6 +144,13 @@ const char *tgsi_interpolate_names[TGSI_INTERPOLATE_COUNT] = "COLOR" }; +const char *tgsi_interpolate_locations[TGSI_INTERPOLATE_LOC_COUNT] = +{ + "CENTER", + "CENTROID", + "SAMPLE", +}; + const char *tgsi_primitive_names[PIPE_PRIM_MAX] = { "POINTS", @@ -182,8 +197,9 @@ tgsi_strings_check(void) STATIC_ASSERT(Elements(tgsi_property_names) == TGSI_PROPERTY_COUNT); STATIC_ASSERT(Elements(tgsi_primitive_names) == PIPE_PRIM_MAX); STATIC_ASSERT(Elements(tgsi_interpolate_names) == TGSI_INTERPOLATE_COUNT); + STATIC_ASSERT(Elements(tgsi_return_type_names) == TGSI_RETURN_TYPE_COUNT); (void) tgsi_processor_type_names; - (void) tgsi_type_names; + (void) tgsi_return_type_names; (void) tgsi_immediate_type_names; (void) tgsi_fs_coord_origin_names; (void) tgsi_fs_coord_pixel_center_names; diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h b/src/gallium/auxiliary/tgsi/tgsi_strings.h index 3477d50..c842746 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.h +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h @@ -46,10 +46,12 @@ extern const char *tgsi_texture_names[TGSI_TEXTURE_COUNT]; extern const char *tgsi_property_names[TGSI_PROPERTY_COUNT]; -extern const char *tgsi_type_names[5]; +extern const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT]; extern const char *tgsi_interpolate_names[TGSI_INTERPOLATE_COUNT]; +extern const char *tgsi_interpolate_locations[TGSI_INTERPOLATE_LOC_COUNT]; + extern const char *tgsi_primitive_names[PIPE_PRIM_MAX]; extern const char *tgsi_fs_coord_origin_names[2]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 2dbbaac..f965b01 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -195,15 +195,8 @@ static boolean parse_float( const char **pcur, float *val ) boolean integral_part = FALSE; boolean fractional_part = FALSE; - if (*cur == '0' && *(cur + 1) == 'x') { - union fi fi; - fi.ui = strtoul(cur, NULL, 16); - *val = fi.f; - cur += 10; - goto out; - } - *val = (float) atof( cur ); + if (*cur == '-' || *cur == '+') cur++; if (is_digit( cur )) { @@ -235,8 +228,6 @@ static boolean parse_float( const char **pcur, float *val ) else return FALSE; } - -out: *pcur = cur; return TRUE; } @@ -744,8 +735,9 @@ parse_dst_operand( static boolean parse_optional_swizzle( struct translate_ctx *ctx, - uint swizzle[4], - boolean *parsed_swizzle, int max ) + uint *swizzle, + boolean *parsed_swizzle, + int components) { const char *cur = ctx->cur; @@ -757,7 +749,7 @@ parse_optional_swizzle( cur++; eat_opt_white( &cur ); - for (i = 0; i < max; i++) { + for (i = 0; i < components; i++) { if (uprcase( *cur ) == 'X') swizzle[i] = TGSI_SWIZZLE_X; else if (uprcase( *cur ) == 'Y') @@ -812,6 +804,13 @@ parse_src_operand( src->Dimension.Indirect = 0; src->Dimension.Dimension = 0; src->Dimension.Index = bracket[0].index; + if (bracket[0].ind_file != TGSI_FILE_NULL) { + src->Dimension.Indirect = 1; + src->DimIndirect.File = bracket[0].ind_file; + src->DimIndirect.Index = bracket[0].ind_index; + src->DimIndirect.Swizzle = bracket[0].ind_comp; + src->DimIndirect.ArrayID = bracket[0].ind_array; + } bracket[0] = bracket[1]; } src->Register.Index = bracket[0].index; @@ -848,31 +847,28 @@ parse_src_operand( } static boolean -parse_tex_offset_operand( +parse_texoffset_operand( struct translate_ctx *ctx, - struct tgsi_texture_offset *tex_offset ) + struct tgsi_texture_offset *src ) { uint file; - uint swizzle[4]; + uint swizzle[3]; boolean parsed_swizzle; - struct parsed_bracket bracket[2]; - int parsed_opt_brackets; + struct parsed_bracket bracket; - if (!parse_register_src(ctx, &file, &bracket[0])) - return FALSE; - if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets)) + if (!parse_register_src(ctx, &file, &bracket)) return FALSE; - tex_offset->File = file; - tex_offset->Index = bracket[0].index; + src->File = file; + src->Index = bracket.index; /* Parse optional swizzle. */ if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, 3 )) { if (parsed_swizzle) { - tex_offset->SwizzleX = swizzle[0]; - tex_offset->SwizzleY = swizzle[1]; - tex_offset->SwizzleZ = swizzle[2]; + src->SwizzleX = swizzle[0]; + src->SwizzleY = swizzle[1]; + src->SwizzleZ = swizzle[2]; } } @@ -997,8 +993,7 @@ parse_instruction( /* * These are not considered tex opcodes here (no additional * target argument) however we're required to set the Texture - * bit so we can set the number of tex offsets (offsets aren't - * actually handled here yet in any case). + * bit so we can set the number of tex offsets. */ inst.Instruction.Texture = 1; inst.Texture.Texture = TGSI_TEXTURE_UNKNOWN; @@ -1042,24 +1037,18 @@ parse_instruction( } } - if (inst.Instruction.Texture) { - uint j = 0; - - cur = ctx->cur; - eat_opt_white( &cur ); - for (j = 0; j < 4; j++) { - if (*cur == ',') { - ctx->cur = cur; - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_tex_offset_operand(ctx, &inst.TexOffsets[j])) - return FALSE; - cur = ctx->cur; - } else - break; - } - inst.Texture.NumOffsets = j; + cur = ctx->cur; + eat_opt_white( &cur ); + for (i = 0; inst.Instruction.Texture && *cur == ','; i++) { + cur++; + eat_opt_white( &cur ); + ctx->cur = cur; + if (!parse_texoffset_operand( ctx, &inst.TexOffsets[i] )) + return FALSE; + cur = ctx->cur; + eat_opt_white( &cur ); } + inst.Texture.NumOffsets = i; cur = ctx->cur; eat_opt_white( &cur ); @@ -1269,8 +1258,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) ++cur; eat_opt_white( &cur ); for (j = 0; j < 4; ++j) { - for (i = 0; i < PIPE_TYPE_COUNT; ++i) { - if (str_match_nocase_whole(&cur, tgsi_type_names[i])) { + for (i = 0; i < TGSI_RETURN_TYPE_COUNT; ++i) { + if (str_match_nocase_whole(&cur, tgsi_return_type_names[i])) { switch (j) { case 0: decl.SamplerView.ReturnTypeX = i; @@ -1290,7 +1279,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) break; } } - if (i == PIPE_TYPE_COUNT) { + if (i == TGSI_RETURN_TYPE_COUNT) { if (j == 0 || j > 2) { report_error(ctx, "Expected type name"); return FALSE; @@ -1388,11 +1377,17 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur = ctx->cur; eat_opt_white( &cur ); if (*cur == ',' && !is_vs_input) { + uint i; + cur++; eat_opt_white( &cur ); - if (str_match_nocase_whole( &cur, "CENTROID" )) { - decl.Interp.Centroid = 1; - ctx->cur = cur; + for (i = 0; i < TGSI_INTERPOLATE_LOC_COUNT; i++) { + if (str_match_nocase_whole( &cur, tgsi_interpolate_locations[i] )) { + decl.Interp.Location = i; + + ctx->cur = cur; + break; + } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 38cce58..f524dfb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -78,7 +78,7 @@ struct ureg_tokens { #define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 4096 -#define UREG_MAX_ADDR 2 +#define UREG_MAX_ADDR 3 #define UREG_MAX_PRED 1 #define UREG_MAX_ARRAY_TEMPS 256 @@ -103,7 +103,7 @@ struct ureg_program unsigned semantic_index; unsigned interp; unsigned char cylindrical_wrap; - unsigned char centroid; + unsigned interp_location; } fs_input[UREG_MAX_INPUT]; unsigned nr_fs_inputs; @@ -165,14 +165,7 @@ struct ureg_program struct const_decl const_decls; struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS]; - unsigned property_gs_input_prim; - unsigned property_gs_output_prim; - unsigned property_gs_max_vertices; - unsigned property_gs_invocations; - unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */ - unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */ - unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */ - unsigned char property_fs_depth_layout; /* TGSI_FS_DEPTH_LAYOUT */ + unsigned properties[TGSI_PROPERTY_COUNT]; unsigned nr_addrs; unsigned nr_preds; @@ -277,58 +270,10 @@ ureg_dst_register( unsigned file, void -ureg_property_gs_input_prim(struct ureg_program *ureg, - unsigned input_prim) +ureg_property(struct ureg_program *ureg, unsigned name, unsigned value) { - ureg->property_gs_input_prim = input_prim; -} - -void -ureg_property_gs_output_prim(struct ureg_program *ureg, - unsigned output_prim) -{ - ureg->property_gs_output_prim = output_prim; -} - -void -ureg_property_gs_max_vertices(struct ureg_program *ureg, - unsigned max_vertices) -{ - ureg->property_gs_max_vertices = max_vertices; -} -void -ureg_property_gs_invocations(struct ureg_program *ureg, - unsigned invocations) -{ - ureg->property_gs_invocations = invocations; -} - -void -ureg_property_fs_coord_origin(struct ureg_program *ureg, - unsigned fs_coord_origin) -{ - ureg->property_fs_coord_origin = fs_coord_origin; -} - -void -ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, - unsigned fs_coord_pixel_center) -{ - ureg->property_fs_coord_pixel_center = fs_coord_pixel_center; -} - -void -ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg, - unsigned fs_color0_writes_all_cbufs) -{ - ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs; -} - -void -ureg_property_fs_depth_layout(struct ureg_program *ureg, - unsigned fs_depth_layout) -{ - ureg->property_fs_depth_layout = fs_depth_layout; + assert(name < Elements(ureg->properties)); + ureg->properties[name] = value; } struct ureg_src @@ -337,7 +282,7 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, unsigned semantic_index, unsigned interp_mode, unsigned cylindrical_wrap, - unsigned centroid) + unsigned interp_location) { unsigned i; @@ -353,7 +298,7 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, ureg->fs_input[i].semantic_index = semantic_index; ureg->fs_input[i].interp = interp_mode; ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap; - ureg->fs_input[i].centroid = centroid; + ureg->fs_input[i].interp_location = interp_location; ureg->nr_fs_inputs++; } else { set_bad(ureg); @@ -1280,7 +1225,7 @@ emit_decl_fs(struct ureg_program *ureg, unsigned semantic_index, unsigned interpolate, unsigned cylindrical_wrap, - unsigned centroid) + unsigned interpolate_location) { union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4); @@ -1299,7 +1244,7 @@ emit_decl_fs(struct ureg_program *ureg, out[2].value = 0; out[2].decl_interp.Interpolate = interpolate; out[2].decl_interp.CylindricalWrap = cylindrical_wrap; - out[2].decl_interp.Centroid = centroid; + out[2].decl_interp.Location = interpolate_location; out[3].value = 0; out[3].decl_semantic.Name = semantic_name; @@ -1444,61 +1389,9 @@ static void emit_decls( struct ureg_program *ureg ) { unsigned i; - if (ureg->property_gs_input_prim != ~0) { - assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); - - emit_property(ureg, - TGSI_PROPERTY_GS_INPUT_PRIM, - ureg->property_gs_input_prim); - } - - if (ureg->property_gs_output_prim != ~0) { - assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); - - emit_property(ureg, - TGSI_PROPERTY_GS_OUTPUT_PRIM, - ureg->property_gs_output_prim); - } - - if (ureg->property_gs_max_vertices != ~0) { - assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); - - emit_property(ureg, - TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, - ureg->property_gs_max_vertices); - } - - if (ureg->property_fs_coord_origin) { - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); - - emit_property(ureg, - TGSI_PROPERTY_FS_COORD_ORIGIN, - ureg->property_fs_coord_origin); - } - - if (ureg->property_fs_coord_pixel_center) { - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); - - emit_property(ureg, - TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, - ureg->property_fs_coord_pixel_center); - } - - if (ureg->property_fs_color0_writes_all_cbufs) { - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); - - emit_property(ureg, - TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, - ureg->property_fs_color0_writes_all_cbufs); - } - - if (ureg->property_fs_depth_layout) { - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); - - emit_property(ureg, - TGSI_PROPERTY_FS_DEPTH_LAYOUT, - ureg->property_fs_depth_layout); - } + for (i = 0; i < Elements(ureg->properties); i++) + if (ureg->properties[i] != ~0) + emit_property(ureg, i, ureg->properties[i]); if (ureg->processor == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < UREG_MAX_INPUT; i++) { @@ -1515,7 +1408,7 @@ static void emit_decls( struct ureg_program *ureg ) ureg->fs_input[i].semantic_index, ureg->fs_input[i].interp, ureg->fs_input[i].cylindrical_wrap, - ureg->fs_input[i].centroid); + ureg->fs_input[i].interp_location); } } else { for (i = 0; i < ureg->nr_gs_inputs; i++) { @@ -1749,14 +1642,15 @@ void ureg_free_tokens( const struct tgsi_token *tokens ) struct ureg_program *ureg_create( unsigned processor ) { + int i; struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); if (ureg == NULL) goto no_ureg; ureg->processor = processor; - ureg->property_gs_input_prim = ~0; - ureg->property_gs_output_prim = ~0; - ureg->property_gs_max_vertices = ~0; + + for (i = 0; i < Elements(ureg->properties); i++) + ureg->properties[i] = ~0; ureg->free_temps = util_bitmask_create(); if (ureg->free_temps == NULL) @@ -1783,7 +1677,7 @@ no_ureg: } -const unsigned +unsigned ureg_get_nr_outputs( const struct ureg_program *ureg ) { if (!ureg) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index a0a50b7..f254b1e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -115,7 +115,7 @@ ureg_get_tokens( struct ureg_program *ureg, /* * Returns the number of currently declared outputs. */ -const unsigned +unsigned ureg_get_nr_outputs( const struct ureg_program *ureg ); @@ -153,36 +153,7 @@ ureg_create_shader_and_destroy( struct ureg_program *p, */ void -ureg_property_gs_input_prim(struct ureg_program *ureg, - unsigned input_prim); - -void -ureg_property_gs_output_prim(struct ureg_program *ureg, - unsigned output_prim); - -void -ureg_property_gs_max_vertices(struct ureg_program *ureg, - unsigned max_vertices); - -void -ureg_property_gs_invocations(struct ureg_program *ureg, - unsigned invocations); - -void -ureg_property_fs_coord_origin(struct ureg_program *ureg, - unsigned fs_coord_origin); - -void -ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, - unsigned fs_coord_pixel_center); - -void -ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg, - unsigned fs_color0_writes_all_cbufs); - -void -ureg_property_fs_depth_layout(struct ureg_program *ureg, - unsigned fs_depth_layout); +ureg_property(struct ureg_program *ureg, unsigned name, unsigned value); /*********************************************************************** @@ -195,7 +166,7 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *, unsigned semantic_index, unsigned interp_mode, unsigned cylindrical_wrap, - unsigned centroid); + unsigned interp_location); static INLINE struct ureg_src ureg_DECL_fs_input_cyl(struct ureg_program *ureg, diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index e48159c..d572ff0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -193,7 +193,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_MAD: case TGSI_OPCODE_SUB: case TGSI_OPCODE_LRP: - case TGSI_OPCODE_CND: case TGSI_OPCODE_FRC: case TGSI_OPCODE_CEIL: case TGSI_OPCODE_CLAMP: @@ -245,13 +244,14 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_USNE: case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_UMUL_HI: + case TGSI_OPCODE_DDX_FINE: + case TGSI_OPCODE_DDY_FINE: /* Channel-wise operations */ read_mask = write_mask; break; case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: - case TGSI_OPCODE_RCC: read_mask = TGSI_WRITEMASK_X; break; diff --git a/src/gallium/auxiliary/util/rgtc.c b/src/gallium/auxiliary/util/rgtc.c new file mode 100644 index 0000000..6886ac0 --- /dev/null +++ b/src/gallium/auxiliary/util/rgtc.c @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2011 Red Hat Inc. + * + * block compression parts are: + * Copyright (C) 2004 Roland Scheidegger All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: + * Dave Airlie + */ + +#include +#include "macros.h" + +#include "rgtc.h" + +#define RGTC_DEBUG 0 + +#define TAG(x) util_format_unsigned_##x + +#define TYPE unsigned char +#define T_MIN 0 +#define T_MAX 0xff + +#include "texcompress_rgtc_tmp.h" + +#undef TAG +#undef TYPE +#undef T_MIN +#undef T_MAX + +#define TAG(x) util_format_signed_##x +#define TYPE signed char +#define T_MIN (signed char)-128 +#define T_MAX (signed char)127 + +#include "texcompress_rgtc_tmp.h" + +#undef TAG +#undef TYPE +#undef T_MIN +#undef T_MAX + diff --git a/src/gallium/auxiliary/util/rgtc.h b/src/gallium/auxiliary/util/rgtc.h new file mode 100644 index 0000000..6bba9f2 --- /dev/null +++ b/src/gallium/auxiliary/util/rgtc.h @@ -0,0 +1,39 @@ +/* + * Copyright © 2014 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _RGTC_H +#define _RGTC_H + +void util_format_unsigned_fetch_texel_rgtc(unsigned srcRowStride, const unsigned char *pixdata, + unsigned i, unsigned j, unsigned char *value, unsigned comps); + +void util_format_signed_fetch_texel_rgtc(unsigned srcRowStride, const signed char *pixdata, + unsigned i, unsigned j, signed char *value, unsigned comps); + +void util_format_unsigned_encode_rgtc_ubyte(unsigned char *blkaddr, unsigned char srccolors[4][4], + int numxpixels, int numypixels); + +void util_format_signed_encode_rgtc_ubyte(signed char *blkaddr, signed char srccolors[4][4], + int numxpixels, int numypixels); +#endif /* _RGTC_H */ diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c new file mode 100644 index 0000000..d2d1313 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -0,0 +1,458 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * CPU feature detection. + * + * @author Dennis Smit + * @author Based on the work of Eric Anholt + */ + +#include "pipe/p_config.h" + +#include "u_debug.h" +#include "u_cpu_detect.h" + +#if defined(PIPE_ARCH_PPC) +#if defined(PIPE_OS_APPLE) +#include +#else +#include +#include +#endif +#endif + +#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD) +#include +#include +#include +#endif + +#if defined(PIPE_OS_FREEBSD) +#include +#include +#endif + +#if defined(PIPE_OS_LINUX) +#include +#endif + +#ifdef PIPE_OS_UNIX +#include +#endif + +#if defined(PIPE_OS_WINDOWS) +#include +#if defined(PIPE_CC_MSVC) +#include +#endif +#endif + + +#ifdef DEBUG +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE) +#endif + + +struct util_cpu_caps util_cpu_caps; + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +static int has_cpuid(void); +#endif + + +#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) +static jmp_buf __lv_powerpc_jmpbuf; +static volatile sig_atomic_t __lv_powerpc_canjump = 0; + +static void +sigill_handler(int sig) +{ + if (!__lv_powerpc_canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + __lv_powerpc_canjump = 0; + longjmp(__lv_powerpc_jmpbuf, 1); +} +#endif + +#if defined(PIPE_ARCH_PPC) +static void +check_os_altivec_support(void) +{ +#if defined(PIPE_OS_APPLE) + int sels[2] = {CTL_HW, HW_VECTORUNIT}; + int has_vu = 0; + int len = sizeof (has_vu); + int err; + + err = sysctl(sels, 2, &has_vu, &len, NULL, 0); + + if (err == 0) { + if (has_vu != 0) { + util_cpu_caps.has_altivec = 1; + } + } +#else /* !PIPE_OS_APPLE */ + /* not on Apple/Darwin, do it the brute-force way */ + /* this is borrowed from the libmpeg2 library */ + signal(SIGILL, sigill_handler); + if (setjmp(__lv_powerpc_jmpbuf)) { + signal(SIGILL, SIG_DFL); + } else { + __lv_powerpc_canjump = 1; + + __asm __volatile + ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal(SIGILL, SIG_DFL); + util_cpu_caps.has_altivec = 1; + } +#endif /* !PIPE_OS_APPLE */ +} +#endif /* PIPE_ARCH_PPC */ + + +#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64) +static int has_cpuid(void) +{ +#if defined(PIPE_ARCH_X86) +#if defined(PIPE_OS_GCC) + int a, c; + + __asm __volatile + ("pushf\n" + "popl %0\n" + "movl %0, %1\n" + "xorl $0x200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "popl %0\n" + : "=a" (a), "=c" (c) + : + : "cc"); + + return a != c; +#else + /* FIXME */ + return 1; +#endif +#elif defined(PIPE_ARCH_X86_64) + return 1; +#else + return 0; +#endif +} + + +/** + * @sa cpuid.h included in gcc-4.3 onwards. + * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + */ +static INLINE void +cpuid(uint32_t ax, uint32_t *p) +{ +#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86) + __asm __volatile ( + "xchgl %%ebx, %1\n\t" + "cpuid\n\t" + "xchgl %%ebx, %1" + : "=a" (p[0]), + "=S" (p[1]), + "=c" (p[2]), + "=d" (p[3]) + : "0" (ax) + ); +#elif (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86_64) + __asm __volatile ( + "cpuid\n\t" + : "=a" (p[0]), + "=b" (p[1]), + "=c" (p[2]), + "=d" (p[3]) + : "0" (ax) + ); +#elif defined(PIPE_CC_MSVC) + __cpuid(p, ax); +#else + p[0] = 0; + p[1] = 0; + p[2] = 0; + p[3] = 0; +#endif +} + +/** + * @sa cpuid.h included in gcc-4.4 onwards. + * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx + */ +static INLINE void +cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p) +{ +#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86) + __asm __volatile ( + "xchgl %%ebx, %1\n\t" + "cpuid\n\t" + "xchgl %%ebx, %1" + : "=a" (p[0]), + "=S" (p[1]), + "=c" (p[2]), + "=d" (p[3]) + : "0" (ax), "2" (cx) + ); +#elif (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86_64) + __asm __volatile ( + "cpuid\n\t" + : "=a" (p[0]), + "=b" (p[1]), + "=c" (p[2]), + "=d" (p[3]) + : "0" (ax), "2" (cx) + ); +#elif defined(PIPE_CC_MSVC) + __cpuidex(p, ax, cx); +#else + p[0] = 0; + p[1] = 0; + p[2] = 0; + p[3] = 0; +#endif +} + + +static INLINE uint64_t xgetbv(void) +{ +#if defined(PIPE_CC_GCC) + uint32_t eax, edx; + + __asm __volatile ( + ".byte 0x0f, 0x01, 0xd0" // xgetbv isn't supported on gcc < 4.4 + : "=a"(eax), + "=d"(edx) + : "c"(0) + ); + + return ((uint64_t)edx << 32) | eax; +#elif defined(PIPE_CC_MSVC) && defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + return _xgetbv(_XCR_XFEATURE_ENABLED_MASK); +#else + return 0; +#endif +} + + +#if defined(PIPE_ARCH_X86) +static INLINE boolean sse2_has_daz(void) +{ + struct { + uint32_t pad1[7]; + uint32_t mxcsr_mask; + uint32_t pad2[128-8]; + } PIPE_ALIGN_VAR(16) fxarea; + + fxarea.mxcsr_mask = 0; +#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) + __asm __volatile ("fxsave %0" : "+m" (fxarea)); +#elif (defined(PIPE_CC_MSVC) && _MSC_VER >= 1700) || defined(PIPE_CC_ICL) + /* 1700 = Visual Studio 2012 */ + _fxsave(&fxarea); +#else + fxarea.mxcsr_mask = 0; +#endif + return !!(fxarea.mxcsr_mask & (1 << 6)); +} +#endif + +#endif /* X86 or X86_64 */ + +void +util_cpu_detect(void) +{ + static boolean util_cpu_detect_initialized = FALSE; + + if(util_cpu_detect_initialized) + return; + + memset(&util_cpu_caps, 0, sizeof util_cpu_caps); + + /* Count the number of CPUs in system */ +#if defined(PIPE_OS_WINDOWS) + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors; + } +#elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN) + util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (util_cpu_caps.nr_cpus == -1) + util_cpu_caps.nr_cpus = 1; +#elif defined(PIPE_OS_BSD) + { + int mib[2], ncpu; + int len; + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + + len = sizeof (ncpu); + sysctl(mib, 2, &ncpu, &len, NULL, 0); + util_cpu_caps.nr_cpus = ncpu; + } +#else + util_cpu_caps.nr_cpus = 1; +#endif + + /* Make the fallback cacheline size nonzero so that it can be + * safely passed to align(). + */ + util_cpu_caps.cacheline = sizeof(void *); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if (has_cpuid()) { + uint32_t regs[4]; + uint32_t regs2[4]; + + util_cpu_caps.cacheline = 32; + + /* Get max cpuid level */ + cpuid(0x00000000, regs); + + if (regs[0] >= 0x00000001) { + unsigned int cacheline; + + cpuid (0x00000001, regs2); + + util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf; + if (util_cpu_caps.x86_cpu_type == 0xf) + util_cpu_caps.x86_cpu_type = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */ + + /* general feature flags */ + util_cpu_caps.has_tsc = (regs2[3] >> 4) & 1; /* 0x0000010 */ + util_cpu_caps.has_mmx = (regs2[3] >> 23) & 1; /* 0x0800000 */ + util_cpu_caps.has_sse = (regs2[3] >> 25) & 1; /* 0x2000000 */ + util_cpu_caps.has_sse2 = (regs2[3] >> 26) & 1; /* 0x4000000 */ + util_cpu_caps.has_sse3 = (regs2[2] >> 0) & 1; /* 0x0000001 */ + util_cpu_caps.has_ssse3 = (regs2[2] >> 9) & 1; /* 0x0000020 */ + util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1; + util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1; + util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1; + util_cpu_caps.has_avx = ((regs2[2] >> 28) & 1) && // AVX + ((regs2[2] >> 27) & 1) && // OSXSAVE + ((xgetbv() & 6) == 6); // XMM & YMM + util_cpu_caps.has_f16c = (regs2[2] >> 29) & 1; + util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */ +#if defined(PIPE_ARCH_X86_64) + util_cpu_caps.has_daz = 1; +#else + util_cpu_caps.has_daz = util_cpu_caps.has_sse3 || + (util_cpu_caps.has_sse2 && sse2_has_daz()); +#endif + + cacheline = ((regs2[1] >> 8) & 0xFF) * 8; + if (cacheline > 0) + util_cpu_caps.cacheline = cacheline; + } + if (util_cpu_caps.has_avx && regs[0] >= 0x00000007) { + uint32_t regs7[4]; + cpuid_count(0x00000007, 0x00000000, regs7); + util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1; + } + + if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) { + /* GenuineIntel */ + util_cpu_caps.has_intel = 1; + } + + cpuid(0x80000000, regs); + + if (regs[0] >= 0x80000001) { + + cpuid(0x80000001, regs2); + + util_cpu_caps.has_mmx |= (regs2[3] >> 23) & 1; + util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1; + util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1; + util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1; + + util_cpu_caps.has_xop = util_cpu_caps.has_avx && + ((regs2[2] >> 11) & 1); + } + + if (regs[0] >= 0x80000006) { + cpuid(0x80000006, regs2); + util_cpu_caps.cacheline = regs2[2] & 0xFF; + } + + if (!util_cpu_caps.has_sse) { + util_cpu_caps.has_sse2 = 0; + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; + util_cpu_caps.has_sse4_1 = 0; + } + } +#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + +#if defined(PIPE_ARCH_PPC) + check_os_altivec_support(); +#endif /* PIPE_ARCH_PPC */ + +#ifdef DEBUG + if (debug_get_option_dump_cpu()) { + debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); + + debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); + debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); + + debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); + debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); + debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); + debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); + debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); + debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); + debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); + debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); + debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2); + debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx); + debug_printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2); + debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c); + debug_printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt); + debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); + debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); + debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop); + debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); + debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz); + } +#endif + + util_cpu_detect_initialized = TRUE; +} diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h new file mode 100644 index 0000000..5ccfc93 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -0,0 +1,87 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + ***************************************************************************/ + +/** + * @file + * CPU feature detection. + * + * @author Dennis Smit + * @author Based on the work of Eric Anholt + */ + +#ifndef _UTIL_CPU_DETECT_H +#define _UTIL_CPU_DETECT_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_config.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct util_cpu_caps { + unsigned nr_cpus; + + /* Feature flags */ + int x86_cpu_type; + unsigned cacheline; + + unsigned has_intel:1; + unsigned has_tsc:1; + unsigned has_mmx:1; + unsigned has_mmx2:1; + unsigned has_sse:1; + unsigned has_sse2:1; + unsigned has_sse3:1; + unsigned has_ssse3:1; + unsigned has_sse4_1:1; + unsigned has_sse4_2:1; + unsigned has_popcnt:1; + unsigned has_avx:1; + unsigned has_avx2:1; + unsigned has_f16c:1; + unsigned has_3dnow:1; + unsigned has_3dnow_ext:1; + unsigned has_xop:1; + unsigned has_altivec:1; + unsigned has_daz:1; +}; + +extern struct util_cpu_caps +util_cpu_caps; + +void util_cpu_detect(void); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _UTIL_CPU_DETECT_H */ diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index ae248e0..d79f31e 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -46,6 +46,12 @@ #include /* CHAR_BIT */ #include /* isalnum */ +#ifdef _WIN32 +#include +#include +#endif + + void _debug_vprintf(const char *format, va_list ap) { static char buf[4096] = {'\0'}; @@ -64,6 +70,32 @@ void _debug_vprintf(const char *format, va_list ap) } +void +debug_disable_error_message_boxes(void) +{ +#ifdef _WIN32 + /* When Windows' error message boxes are disabled for this process (as is + * typically the case when running tests in an automated fashion) we disable + * CRT message boxes too. + */ + UINT uMode = SetErrorMode(0); + SetErrorMode(uMode); + if (uMode & SEM_FAILCRITICALERRORS) { + /* Disable assertion failure message box. + * http://msdn.microsoft.com/en-us/library/sas1dkb2.aspx + */ + _set_error_mode(_OUT_TO_STDERR); +#ifdef _MSC_VER + /* Disable abort message box. + * http://msdn.microsoft.com/en-us/library/e631wekh.aspx + */ + _set_abort_behavior(0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT); +#endif + } +#endif /* _WIN32 */ +} + + #ifdef DEBUG void debug_print_blob( const char *name, const void *blob, @@ -274,10 +306,7 @@ void _debug_assert_fail(const char *expr, const char *function) { _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr); - if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE)) - os_abort(); - else - _debug_printf("continuing...\n"); + os_abort(); } @@ -337,10 +366,10 @@ debug_dump_flags(const struct debug_named_value *names, while(names->name) { if((names->value & value) == names->value) { if (!first) - util_strncat(output, "|", sizeof(output)); + util_strncat(output, "|", sizeof(output) - strlen(output) - 1); else first = 0; - util_strncat(output, names->name, sizeof(output) - 1); + util_strncat(output, names->name, sizeof(output) - strlen(output) - 1); output[sizeof(output) - 1] = '\0'; value &= ~names->value; } @@ -349,12 +378,12 @@ debug_dump_flags(const struct debug_named_value *names, if (value) { if (!first) - util_strncat(output, "|", sizeof(output)); + util_strncat(output, "|", sizeof(output) - strlen(output) - 1); else first = 0; util_snprintf(rest, sizeof(rest), "0x%08lx", value); - util_strncat(output, rest, sizeof(output) - 1); + util_strncat(output, rest, sizeof(output) - strlen(output) - 1); output[sizeof(output) - 1] = '\0'; } diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index 9e4eb41..4c22fdf 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -138,6 +138,15 @@ void debug_print_format(const char *msg, unsigned fmt ); #endif +/** + * Disable interactive error message boxes. + * + * Should be called as soon as possible for effectiveness. + */ +void +debug_disable_error_message_boxes(void); + + /** * Hard-coded breakpoint. */ @@ -151,10 +160,17 @@ void debug_print_format(const char *msg, unsigned fmt ); long debug_get_num_option(const char *name, long dfault); +#ifdef _MSC_VER +__declspec(noreturn) +#endif void _debug_assert_fail(const char *expr, const char *file, unsigned line, - const char *function); + const char *function) +#ifdef __GNUC__ + __attribute__((__noreturn__)) +#endif +; /** @@ -169,7 +185,7 @@ void _debug_assert_fail(const char *expr, #ifdef DEBUG #define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__)) #else -#define debug_assert(expr) do { } while (0 && (expr)) +#define debug_assert(expr) (void)(0 && (expr)) #endif diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c new file mode 100644 index 0000000..c1ce408 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format.c @@ -0,0 +1,805 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Pixel format accessor functions. + * + * @author Jose Fonseca + */ + +#include "u_math.h" +#include "u_memory.h" +#include "u_format.h" +#include "u_format_s3tc.h" +#include "u_surface.h" + +#include "pipe/p_defines.h" + + +boolean +util_format_is_float(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + + assert(desc); + if (!desc) { + return FALSE; + } + + i = util_format_get_first_non_void_channel(format); + if (i == -1) { + return FALSE; + } + + return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE; +} + + +/** Test if the format contains RGB, but not alpha */ +boolean +util_format_has_alpha(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + return (desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) && + desc->swizzle[3] != UTIL_FORMAT_SWIZZLE_1; +} + + +boolean +util_format_is_luminance(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + if ((desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) { + return TRUE; + } + return FALSE; +} + +boolean +util_format_is_alpha(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + if ((desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X) { + return TRUE; + } + return FALSE; +} + +boolean +util_format_is_pure_integer(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + int i; + + /* Find the first non-void channel. */ + i = util_format_get_first_non_void_channel(format); + if (i == -1) + return FALSE; + + return desc->channel[i].pure_integer ? TRUE : FALSE; +} + +boolean +util_format_is_pure_sint(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + int i; + + i = util_format_get_first_non_void_channel(format); + if (i == -1) + return FALSE; + + return (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[i].pure_integer) ? TRUE : FALSE; +} + +boolean +util_format_is_pure_uint(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + int i; + + i = util_format_get_first_non_void_channel(format); + if (i == -1) + return FALSE; + + return (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED && desc->channel[i].pure_integer) ? TRUE : FALSE; +} + +/** + * Returns true if all non-void channels are normalized signed. + */ +boolean +util_format_is_snorm(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + int i; + + if (desc->is_mixed) + return FALSE; + + i = util_format_get_first_non_void_channel(format); + if (i == -1) + return FALSE; + + return desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && + !desc->channel[i].pure_integer && + desc->channel[i].normalized; +} + +boolean +util_format_is_luminance_alpha(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + if ((desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_Y) { + return TRUE; + } + return FALSE; +} + + +boolean +util_format_is_intensity(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + if ((desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X) { + return TRUE; + } + return FALSE; +} + +boolean +util_format_is_subsampled_422(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + return desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED && + desc->block.width == 2 && + desc->block.height == 1 && + desc->block.bits == 32; +} + +boolean +util_format_is_supported(enum pipe_format format, unsigned bind) +{ + if (util_format_is_s3tc(format) && !util_format_s3tc_enabled) { + return FALSE; + } + +#ifndef TEXTURE_FLOAT_ENABLED + if ((bind & PIPE_BIND_RENDER_TARGET) && + format != PIPE_FORMAT_R9G9B9E5_FLOAT && + format != PIPE_FORMAT_R11G11B10_FLOAT && + util_format_is_float(format)) { + return FALSE; + } +#endif + + return TRUE; +} + + +/** + * Calculates the MRD for the depth format. MRD is used in depth bias + * for UNORM and unbound depth buffers. When the depth buffer is floating + * point, the depth bias calculation does not use the MRD. However, the + * default MRD will be 1.0 / ((1 << 24) - 1). + */ +double +util_get_depth_format_mrd(const struct util_format_description *desc) +{ + /* + * Depth buffer formats without a depth component OR scenarios + * without a bound depth buffer default to D24. + */ + double mrd = 1.0 / ((1 << 24) - 1); + unsigned depth_channel; + + assert(desc); + + /* + * Some depth formats do not store the depth component in the first + * channel, detect the format and adjust the depth channel. Get the + * swizzled depth component channel. + */ + depth_channel = desc->swizzle[0]; + + if (desc->channel[depth_channel].type == UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[depth_channel].normalized) { + int depth_bits; + + depth_bits = desc->channel[depth_channel].size; + mrd = 1.0 / ((1ULL << depth_bits) - 1); + } + + return mrd; +} + + +void +util_format_read_4f(enum pipe_format format, + float *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + const uint8_t *src_row; + float *dst_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); + dst_row = dst; + + format_desc->unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, w, h); +} + + +void +util_format_write_4f(enum pipe_format format, + const float *src, unsigned src_stride, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + uint8_t *dst_row; + const float *src_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + dst_row = (uint8_t *)dst + y*dst_stride + x*(format_desc->block.bits/8); + src_row = src; + + format_desc->pack_rgba_float(dst_row, dst_stride, src_row, src_stride, w, h); +} + + +void +util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + const uint8_t *src_row; + uint8_t *dst_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); + dst_row = dst; + + format_desc->unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h); +} + + +void +util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_stride, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + uint8_t *dst_row; + const uint8_t *src_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + dst_row = (uint8_t *)dst + y*dst_stride + x*(format_desc->block.bits/8); + src_row = src; + + format_desc->pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h); +} + +void +util_format_read_4ui(enum pipe_format format, + unsigned *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + const uint8_t *src_row; + uint32_t *dst_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); + dst_row = dst; + + format_desc->unpack_rgba_uint(dst_row, dst_stride, src_row, src_stride, w, h); +} + +void +util_format_write_4ui(enum pipe_format format, + const unsigned int *src, unsigned src_stride, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + uint8_t *dst_row; + const uint32_t *src_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + dst_row = (uint8_t *)dst + y*dst_stride + x*(format_desc->block.bits/8); + src_row = src; + + format_desc->pack_rgba_uint(dst_row, dst_stride, src_row, src_stride, w, h); +} + +void +util_format_read_4i(enum pipe_format format, + int *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + const uint8_t *src_row; + int32_t *dst_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); + dst_row = dst; + + format_desc->unpack_rgba_sint(dst_row, dst_stride, src_row, src_stride, w, h); +} + +void +util_format_write_4i(enum pipe_format format, + const int *src, unsigned src_stride, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + uint8_t *dst_row; + const int32_t *src_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + dst_row = (uint8_t *)dst + y*dst_stride + x*(format_desc->block.bits/8); + src_row = src; + + format_desc->pack_rgba_sint(dst_row, dst_stride, src_row, src_stride, w, h); +} + +boolean +util_is_format_compatible(const struct util_format_description *src_desc, + const struct util_format_description *dst_desc) +{ + unsigned chan; + + if (src_desc->format == dst_desc->format) { + return TRUE; + } + + if (src_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + dst_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + return FALSE; + } + + if (src_desc->block.bits != dst_desc->block.bits || + src_desc->nr_channels != dst_desc->nr_channels || + src_desc->colorspace != dst_desc->colorspace) { + return FALSE; + } + + for (chan = 0; chan < 4; ++chan) { + if (src_desc->channel[chan].size != + dst_desc->channel[chan].size) { + return FALSE; + } + } + + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = dst_desc->swizzle[chan]; + + if (swizzle < 4) { + if (src_desc->swizzle[chan] != swizzle) { + return FALSE; + } + if ((src_desc->channel[swizzle].type != + dst_desc->channel[swizzle].type) || + (src_desc->channel[swizzle].normalized != + dst_desc->channel[swizzle].normalized)) { + return FALSE; + } + } + } + + return TRUE; +} + + +boolean +util_format_fits_8unorm(const struct util_format_description *format_desc) +{ + unsigned chan; + + /* + * After linearized sRGB values require more than 8bits. + */ + + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + return FALSE; + } + + switch (format_desc->layout) { + + case UTIL_FORMAT_LAYOUT_S3TC: + /* + * These are straight forward. + */ + return TRUE; + case UTIL_FORMAT_LAYOUT_RGTC: + if (format_desc->format == PIPE_FORMAT_RGTC1_SNORM || + format_desc->format == PIPE_FORMAT_RGTC2_SNORM || + format_desc->format == PIPE_FORMAT_LATC1_SNORM || + format_desc->format == PIPE_FORMAT_LATC2_SNORM) + return FALSE; + return TRUE; + case UTIL_FORMAT_LAYOUT_BPTC: + if (format_desc->format == PIPE_FORMAT_BPTC_RGBA_UNORM) + return TRUE; + return FALSE; + + case UTIL_FORMAT_LAYOUT_PLAIN: + /* + * For these we can find a generic rule. + */ + + for (chan = 0; chan < format_desc->nr_channels; ++chan) { + switch (format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + break; + case UTIL_FORMAT_TYPE_UNSIGNED: + if (!format_desc->channel[chan].normalized || + format_desc->channel[chan].size > 8) { + return FALSE; + } + break; + default: + return FALSE; + } + } + return TRUE; + + default: + /* + * Handle all others on a case by case basis. + */ + + switch (format_desc->format) { + case PIPE_FORMAT_R1_UNORM: + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + case PIPE_FORMAT_R8G8_B8G8_UNORM: + case PIPE_FORMAT_G8R8_G8B8_UNORM: + return TRUE; + + default: + return FALSE; + } + } +} + + +boolean +util_format_translate(enum pipe_format dst_format, + void *dst, unsigned dst_stride, + unsigned dst_x, unsigned dst_y, + enum pipe_format src_format, + const void *src, unsigned src_stride, + unsigned src_x, unsigned src_y, + unsigned width, unsigned height) +{ + const struct util_format_description *dst_format_desc; + const struct util_format_description *src_format_desc; + uint8_t *dst_row; + const uint8_t *src_row; + unsigned x_step, y_step; + unsigned dst_step; + unsigned src_step; + + dst_format_desc = util_format_description(dst_format); + src_format_desc = util_format_description(src_format); + + if (util_is_format_compatible(src_format_desc, dst_format_desc)) { + /* + * Trivial case. + */ + + util_copy_rect(dst, dst_format, dst_stride, dst_x, dst_y, + width, height, src, (int)src_stride, + src_x, src_y); + return TRUE; + } + + assert(dst_x % dst_format_desc->block.width == 0); + assert(dst_y % dst_format_desc->block.height == 0); + assert(src_x % src_format_desc->block.width == 0); + assert(src_y % src_format_desc->block.height == 0); + + dst_row = (uint8_t *)dst + dst_y*dst_stride + dst_x*(dst_format_desc->block.bits/8); + src_row = (const uint8_t *)src + src_y*src_stride + src_x*(src_format_desc->block.bits/8); + + /* + * This works because all pixel formats have pixel blocks with power of two + * sizes. + */ + + y_step = MAX2(dst_format_desc->block.height, src_format_desc->block.height); + x_step = MAX2(dst_format_desc->block.width, src_format_desc->block.width); + assert(y_step % dst_format_desc->block.height == 0); + assert(y_step % src_format_desc->block.height == 0); + + dst_step = y_step / dst_format_desc->block.height * dst_stride; + src_step = y_step / src_format_desc->block.height * src_stride; + + /* + * TODO: double formats will loose precision + * TODO: Add a special case for formats that are mere swizzles of each other + */ + + if (src_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS || + dst_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + float *tmp_z = NULL; + uint8_t *tmp_s = NULL; + + assert(x_step == 1); + assert(y_step == 1); + + if (src_format_desc->unpack_z_float && + dst_format_desc->pack_z_float) { + tmp_z = MALLOC(width * sizeof *tmp_z); + } + + if (src_format_desc->unpack_s_8uint && + dst_format_desc->pack_s_8uint) { + tmp_s = MALLOC(width * sizeof *tmp_s); + } + + while (height--) { + if (tmp_z) { + src_format_desc->unpack_z_float(tmp_z, 0, src_row, src_stride, width, 1); + dst_format_desc->pack_z_float(dst_row, dst_stride, tmp_z, 0, width, 1); + } + + if (tmp_s) { + src_format_desc->unpack_s_8uint(tmp_s, 0, src_row, src_stride, width, 1); + dst_format_desc->pack_s_8uint(dst_row, dst_stride, tmp_s, 0, width, 1); + } + + dst_row += dst_step; + src_row += src_step; + } + + FREE(tmp_s); + + FREE(tmp_z); + + return TRUE; + } + + if (util_format_fits_8unorm(src_format_desc) || + util_format_fits_8unorm(dst_format_desc)) { + unsigned tmp_stride; + uint8_t *tmp_row; + + if (!src_format_desc->unpack_rgba_8unorm || + !dst_format_desc->pack_rgba_8unorm) { + return FALSE; + } + + tmp_stride = MAX2(width, x_step) * 4 * sizeof *tmp_row; + tmp_row = MALLOC(y_step * tmp_stride); + if (!tmp_row) + return FALSE; + + while (height >= y_step) { + src_format_desc->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, y_step); + dst_format_desc->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); + + dst_row += dst_step; + src_row += src_step; + height -= y_step; + } + + if (height) { + src_format_desc->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, height); + dst_format_desc->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, height); + } + + FREE(tmp_row); + } + else { + unsigned tmp_stride; + float *tmp_row; + + if (!src_format_desc->unpack_rgba_float || + !dst_format_desc->pack_rgba_float) { + return FALSE; + } + + tmp_stride = MAX2(width, x_step) * 4 * sizeof *tmp_row; + tmp_row = MALLOC(y_step * tmp_stride); + if (!tmp_row) + return FALSE; + + while (height >= y_step) { + src_format_desc->unpack_rgba_float(tmp_row, tmp_stride, src_row, src_stride, width, y_step); + dst_format_desc->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); + + dst_row += dst_step; + src_row += src_step; + height -= y_step; + } + + if (height) { + src_format_desc->unpack_rgba_float(tmp_row, tmp_stride, src_row, src_stride, width, height); + dst_format_desc->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, height); + } + + FREE(tmp_row); + } + return TRUE; +} + +void util_format_compose_swizzles(const unsigned char swz1[4], + const unsigned char swz2[4], + unsigned char dst[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? + swz1[swz2[i]] : swz2[i]; + } +} + +void util_format_apply_color_swizzle(union pipe_color_union *dst, + const union pipe_color_union *src, + const unsigned char swz[4], + const boolean is_integer) +{ + unsigned c; + + if (is_integer) { + for (c = 0; c < 4; ++c) { + switch (swz[c]) { + case PIPE_SWIZZLE_RED: dst->ui[c] = src->ui[0]; break; + case PIPE_SWIZZLE_GREEN: dst->ui[c] = src->ui[1]; break; + case PIPE_SWIZZLE_BLUE: dst->ui[c] = src->ui[2]; break; + case PIPE_SWIZZLE_ALPHA: dst->ui[c] = src->ui[3]; break; + default: + dst->ui[c] = (swz[c] == PIPE_SWIZZLE_ONE) ? 1 : 0; + break; + } + } + } else { + for (c = 0; c < 4; ++c) { + switch (swz[c]) { + case PIPE_SWIZZLE_RED: dst->f[c] = src->f[0]; break; + case PIPE_SWIZZLE_GREEN: dst->f[c] = src->f[1]; break; + case PIPE_SWIZZLE_BLUE: dst->f[c] = src->f[2]; break; + case PIPE_SWIZZLE_ALPHA: dst->f[c] = src->f[3]; break; + default: + dst->f[c] = (swz[c] == PIPE_SWIZZLE_ONE) ? 1.0f : 0.0f; + break; + } + } + } +} + +void util_format_swizzle_4f(float *dst, const float *src, + const unsigned char swz[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + if (swz[i] <= UTIL_FORMAT_SWIZZLE_W) + dst[i] = src[swz[i]]; + else if (swz[i] == UTIL_FORMAT_SWIZZLE_0) + dst[i] = 0; + else if (swz[i] == UTIL_FORMAT_SWIZZLE_1) + dst[i] = 1; + } +} + +void util_format_unswizzle_4f(float *dst, const float *src, + const unsigned char swz[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + switch (swz[i]) { + case UTIL_FORMAT_SWIZZLE_X: + dst[0] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_Y: + dst[1] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_Z: + dst[2] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_W: + dst[3] = src[i]; + break; + } + } +} diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 5f86e2d..621574c 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -78,10 +78,15 @@ enum util_format_layout { */ UTIL_FORMAT_LAYOUT_ETC = 6, + /** + * BC6/7 Texture Compression + */ + UTIL_FORMAT_LAYOUT_BPTC = 7, + /** * Everything else that doesn't fit in any of the above layouts. */ - UTIL_FORMAT_LAYOUT_OTHER = 7 + UTIL_FORMAT_LAYOUT_OTHER = 8 }; @@ -475,6 +480,7 @@ util_format_is_compressed(enum pipe_format format) case UTIL_FORMAT_LAYOUT_S3TC: case UTIL_FORMAT_LAYOUT_RGTC: case UTIL_FORMAT_LAYOUT_ETC: + case UTIL_FORMAT_LAYOUT_BPTC: /* XXX add other formats in the future */ return TRUE; default: @@ -655,6 +661,8 @@ util_format_has_alpha(enum pipe_format format); boolean util_format_is_luminance(enum pipe_format format); +boolean +util_format_is_alpha(enum pipe_format format); boolean util_format_is_luminance_alpha(enum pipe_format format); @@ -663,6 +671,9 @@ util_format_is_luminance_alpha(enum pipe_format format); boolean util_format_is_intensity(enum pipe_format format); +boolean +util_format_is_subsampled_422(enum pipe_format format); + boolean util_format_is_pure_integer(enum pipe_format format); @@ -906,6 +917,10 @@ util_format_srgb(enum pipe_format format) return PIPE_FORMAT_DXT3_SRGBA; case PIPE_FORMAT_DXT5_RGBA: return PIPE_FORMAT_DXT5_SRGBA; + case PIPE_FORMAT_B5G6R5_UNORM: + return PIPE_FORMAT_B5G6R5_SRGB; + case PIPE_FORMAT_BPTC_RGBA_UNORM: + return PIPE_FORMAT_BPTC_SRGBA; default: return PIPE_FORMAT_NONE; } @@ -949,6 +964,10 @@ util_format_linear(enum pipe_format format) return PIPE_FORMAT_DXT3_RGBA; case PIPE_FORMAT_DXT5_SRGBA: return PIPE_FORMAT_DXT5_RGBA; + case PIPE_FORMAT_B5G6R5_SRGB: + return PIPE_FORMAT_B5G6R5_UNORM; + case PIPE_FORMAT_BPTC_SRGBA: + return PIPE_FORMAT_BPTC_RGBA_UNORM; default: return format; } @@ -1060,8 +1079,7 @@ util_format_luminance_to_red(enum pipe_format format) return PIPE_FORMAT_RGTC1_SNORM; case PIPE_FORMAT_L4A4_UNORM: - /* XXX A4R4 is defined as x00y in u_format.csv */ - return PIPE_FORMAT_A4R4_UNORM; + return PIPE_FORMAT_R4A4_UNORM; case PIPE_FORMAT_L8A8_UNORM: return PIPE_FORMAT_R8A8_UNORM; @@ -1190,7 +1208,7 @@ util_format_write_4i(enum pipe_format format, boolean util_format_fits_8unorm(const struct util_format_description *format_desc); -void +boolean util_format_translate(enum pipe_format dst_format, void *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y, diff --git a/src/gallium/auxiliary/util/u_format_etc.c b/src/gallium/auxiliary/util/u_format_etc.c new file mode 100644 index 0000000..c97287c --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_etc.c @@ -0,0 +1,33 @@ +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "util/u_math.h" +#include "u_format_etc.h" + +/* define etc1_parse_block and etc. */ + +void +util_format_etc1_rgb8_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_etc1_rgb8_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + assert(0); +} + +void +util_format_etc1_rgb8_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_etc1_rgb8_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + assert(0); +} + +void +util_format_etc1_rgb8_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} diff --git a/src/gallium/auxiliary/util/u_format_latc.c b/src/gallium/auxiliary/util/u_format_latc.c new file mode 100644 index 0000000..717974e --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_latc.c @@ -0,0 +1,167 @@ +/************************************************************************** + * + * Copyright (C) 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include "u_math.h" +#include "u_format.h" +#include "u_format_rgtc.h" +#include "u_format_latc.h" +#include "util/rgtc.h" + +void +util_format_latc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + +void +util_format_latc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc1_unorm_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, + unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc1_unorm_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_latc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc1_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + +void +util_format_latc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_latc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_latc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + + +void +util_format_latc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + +void +util_format_latc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc2_unorm_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc2_unorm_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rxtc2_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 3); +} + +void +util_format_latc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_latc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + + +void +util_format_latc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_latc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rxtc2_snorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 3); +} + +void +util_format_latc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + diff --git a/src/gallium/auxiliary/util/u_format_other.c b/src/gallium/auxiliary/util/u_format_other.c new file mode 100644 index 0000000..85001c1 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_other.c @@ -0,0 +1,472 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "u_math.h" +#include "u_format_other.h" +#include "u_format_rgb9e5.h" +#include "u_format_r11g11b10f.h" + + +void +util_format_r9g9b9e5_float_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint8_t *src = src_row; + for(x = 0; x < width; x += 1) { + uint32_t value = *(const uint32_t *)src; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + rgb9e5_to_float3(value, dst); + dst[3] = 1; /* a */ + src += 4; + dst += 4; + } + src_row += src_stride; + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; y += 1) { + const float *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 1) { + uint32_t value = float3_to_rgb9e5(src); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)dst = value; + src += 4; + dst += 4; + } + dst_row += dst_stride; + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_r9g9b9e5_float_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + uint32_t value = *(const uint32_t *)src; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + rgb9e5_to_float3(value, dst); + dst[3] = 1; /* a */ +} + + +void +util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + float p[3]; + for(y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint8_t *src = src_row; + for(x = 0; x < width; x += 1) { + uint32_t value = *(const uint32_t *)src; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + rgb9e5_to_float3(value, p); + dst[0] = float_to_ubyte(p[0]); /* r */ + dst[1] = float_to_ubyte(p[1]); /* g */ + dst[2] = float_to_ubyte(p[2]); /* b */ + dst[3] = 255; /* a */ + src += 4; + dst += 4; + } + src_row += src_stride; + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + float p[3]; + for(y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 1) { + uint32_t value; + p[0] = ubyte_to_float(src[0]); + p[1] = ubyte_to_float(src[1]); + p[2] = ubyte_to_float(src[2]); + value = float3_to_rgb9e5(p); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)dst = value; + src += 4; + dst += 4; + } + dst_row += dst_stride; + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r11g11b10_float_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint8_t *src = src_row; + for(x = 0; x < width; x += 1) { + uint32_t value = *(const uint32_t *)src; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + r11g11b10f_to_float3(value, dst); + dst[3] = 1; /* a */ + src += 4; + dst += 4; + } + src_row += src_stride; + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_r11g11b10_float_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; y += 1) { + const float *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 1) { + uint32_t value = float3_to_r11g11b10f(src); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)dst = value; + src += 4; + dst += 4; + } + dst_row += dst_stride; + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_r11g11b10_float_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + uint32_t value = *(const uint32_t *)src; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + r11g11b10f_to_float3(value, dst); + dst[3] = 1; /* a */ +} + + +void +util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + float p[3]; + for(y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint8_t *src = src_row; + for(x = 0; x < width; x += 1) { + uint32_t value = *(const uint32_t *)src; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + r11g11b10f_to_float3(value, p); + dst[0] = float_to_ubyte(p[0]); /* r */ + dst[1] = float_to_ubyte(p[1]); /* g */ + dst[2] = float_to_ubyte(p[2]); /* b */ + dst[3] = 255; /* a */ + src += 4; + dst += 4; + } + src_row += src_stride; + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r11g11b10_float_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + float p[3]; + for(y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 1) { + uint32_t value; + p[0] = ubyte_to_float(src[0]); + p[1] = ubyte_to_float(src[1]); + p[2] = ubyte_to_float(src[2]); + value = float3_to_r11g11b10f(p); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)dst = value; + src += 4; + dst += 4; + } + dst_row += dst_stride; + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + + +void +util_format_r1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + + +void +util_format_r1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + +} + + +void +util_format_r1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + + +void +util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ +} + + +/* + * PIPE_FORMAT_R8G8Bx_SNORM + * + * A.k.a. D3DFMT_CxV8U8 + */ + +static uint8_t +r8g8bx_derive(int16_t r, int16_t g) +{ + /* Derive blue from red and green components. + * Apparently, we must always use integers to perform calculations, + * otherwise the results won't match D3D's CxV8U8 definition. + */ + return (uint8_t)sqrtf(0x7f * 0x7f - r * r - g * g) * 0xff / 0x7f; +} + +void +util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for(y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; x += 1) { + uint16_t value = *src++; + int16_t r, g; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + + r = ((int16_t)(value << 8)) >> 8; + g = ((int16_t)(value << 0)) >> 8; + + dst[0] = (float)(r * (1.0f/0x7f)); /* r */ + dst[1] = (float)(g * (1.0f/0x7f)); /* g */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + } + src_row += src_stride; + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; x += 1) { + uint16_t value = *src++; + int16_t r, g; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + + r = ((int16_t)(value << 8)) >> 8; + g = ((int16_t)(value << 0)) >> 8; + + dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */ + dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */ + dst[2] = r8g8bx_derive(r, g); /* b */ + dst[3] = 255; /* a */ + dst += 4; + } + src_row += src_stride; + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; y += 1) { + const float *src = src_row; + uint16_t *dst = (uint16_t *)dst_row; + for(x = 0; x < width; x += 1) { + uint16_t value = 0; + + value |= (uint16_t)(((int8_t)(CLAMP(src[0], -1, 1) * 0x7f)) & 0xff) ; + value |= (uint16_t)((((int8_t)(CLAMP(src[1], -1, 1) * 0x7f)) & 0xff) << 8) ; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + + *dst++ = value; + + src += 4; + } + dst_row += dst_stride; + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for(y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint16_t *dst = (uint16_t *)dst_row; + for(x = 0; x < width; x += 1) { + uint16_t value = 0; + + value |= src[0] >> 1; + value |= (src[1] >> 1) << 8; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + + *dst++ = value; + + src += 4; + } + dst_row += dst_stride; + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + uint16_t value = *(const uint16_t *)src; + int16_t r, g; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + + r = ((int16_t)(value << 8)) >> 8; + g = ((int16_t)(value << 0)) >> 8; + + dst[0] = r * (1.0f/0x7f); /* r */ + dst[1] = g * (1.0f/0x7f); /* g */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ + dst[3] = 1.0f; /* a */ +} diff --git a/src/gallium/auxiliary/util/u_format_r11g11b10f.h b/src/gallium/auxiliary/util/u_format_r11g11b10f.h new file mode 100644 index 0000000..57516c3 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_r11g11b10f.h @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2011 Marek Olšák + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* Based on code from The OpenGL Programming Guide / 7th Edition, Appendix J. + * Available here: http://www.opengl-redbook.com/appendices/ + * The algorithm in the book contains a bug though, which is fixed in the code + * below. + */ + +#define UF11(e, m) ((e << 6) | (m)) +#define UF11_EXPONENT_BIAS 15 +#define UF11_EXPONENT_BITS 0x1F +#define UF11_EXPONENT_SHIFT 6 +#define UF11_MANTISSA_BITS 0x3F +#define UF11_MANTISSA_SHIFT (23 - UF11_EXPONENT_SHIFT) +#define UF11_MAX_EXPONENT (UF11_EXPONENT_BITS << UF11_EXPONENT_SHIFT) + +#define UF10(e, m) ((e << 5) | (m)) +#define UF10_EXPONENT_BIAS 15 +#define UF10_EXPONENT_BITS 0x1F +#define UF10_EXPONENT_SHIFT 5 +#define UF10_MANTISSA_BITS 0x1F +#define UF10_MANTISSA_SHIFT (23 - UF10_EXPONENT_SHIFT) +#define UF10_MAX_EXPONENT (UF10_EXPONENT_BITS << UF10_EXPONENT_SHIFT) + +#define F32_INFINITY 0x7f800000 + +static INLINE unsigned f32_to_uf11(float val) +{ + union { + float f; + uint32_t ui; + } f32 = {val}; + + uint16_t uf11 = 0; + + /* Decode little-endian 32-bit floating-point value */ + int sign = (f32.ui >> 16) & 0x8000; + /* Map exponent to the range [-127,128] */ + int exponent = ((f32.ui >> 23) & 0xff) - 127; + int mantissa = f32.ui & 0x007fffff; + + if (exponent == 128) { /* Infinity or NaN */ + /* From the GL_EXT_packed_float spec: + * + * "Additionally: negative infinity is converted to zero; positive + * infinity is converted to positive infinity; and both positive and + * negative NaN are converted to positive NaN." + */ + uf11 = UF11_MAX_EXPONENT; + if (mantissa) { + uf11 |= 1; /* NaN */ + } else { + if (sign) + uf11 = 0; /* 0.0 */ + } + } else if (sign) { + return 0; + } else if (val > 65024.0f) { + /* From the GL_EXT_packed_float spec: + * + * "Likewise, finite positive values greater than 65024 (the maximum + * finite representable unsigned 11-bit floating-point value) are + * converted to 65024." + */ + uf11 = UF11(30, 63); + } + else if (exponent > -15) { /* Representable value */ + exponent += UF11_EXPONENT_BIAS; + mantissa >>= UF11_MANTISSA_SHIFT; + uf11 = exponent << UF11_EXPONENT_SHIFT | mantissa; + } + + return uf11; +} + +static INLINE float uf11_to_f32(uint16_t val) +{ + union { + float f; + uint32_t ui; + } f32; + + int exponent = (val & 0x07c0) >> UF11_EXPONENT_SHIFT; + int mantissa = (val & 0x003f); + + f32.f = 0.0; + + if (exponent == 0) { + if (mantissa != 0) { + const float scale = 1.0 / (1 << 20); + f32.f = scale * mantissa; + } + } + else if (exponent == 31) { + f32.ui = F32_INFINITY | mantissa; + } + else { + float scale, decimal; + exponent -= 15; + if (exponent < 0) { + scale = 1.0f / (1 << -exponent); + } + else { + scale = (float) (1 << exponent); + } + decimal = 1.0f + (float) mantissa / 64; + f32.f = scale * decimal; + } + + return f32.f; +} + +static INLINE unsigned f32_to_uf10(float val) +{ + union { + float f; + uint32_t ui; + } f32 = {val}; + + uint16_t uf10 = 0; + + /* Decode little-endian 32-bit floating-point value */ + int sign = (f32.ui >> 16) & 0x8000; + /* Map exponent to the range [-127,128] */ + int exponent = ((f32.ui >> 23) & 0xff) - 127; + int mantissa = f32.ui & 0x007fffff; + + if (exponent == 128) { + /* From the GL_EXT_packed_float spec: + * + * "Additionally: negative infinity is converted to zero; positive + * infinity is converted to positive infinity; and both positive and + * negative NaN are converted to positive NaN." + */ + uf10 = UF10_MAX_EXPONENT; + if (mantissa) { + uf10 |= 1; /* NaN */ + } else { + if (sign) + uf10 = 0; /* 0.0 */ + } + } else if (sign) { + return 0; + } else if (val > 64512.0f) { + /* From the GL_EXT_packed_float spec: + * + * "Likewise, finite positive values greater than 64512 (the maximum + * finite representable unsigned 10-bit floating-point value) are + * converted to 64512." + */ + uf10 = UF10(30, 31); + } + else if (exponent > -15) { /* Representable value */ + exponent += UF10_EXPONENT_BIAS; + mantissa >>= UF10_MANTISSA_SHIFT; + uf10 = exponent << UF10_EXPONENT_SHIFT | mantissa; + } + + return uf10; +} + +static INLINE float uf10_to_f32(uint16_t val) +{ + union { + float f; + uint32_t ui; + } f32; + + int exponent = (val & 0x03e0) >> UF10_EXPONENT_SHIFT; + int mantissa = (val & 0x001f); + + f32.f = 0.0; + + if (exponent == 0) { + if (mantissa != 0) { + const float scale = 1.0 / (1 << 20); + f32.f = scale * mantissa; + } + } + else if (exponent == 31) { + f32.ui = F32_INFINITY | mantissa; + } + else { + float scale, decimal; + exponent -= 15; + if (exponent < 0) { + scale = 1.0f / (1 << -exponent); + } + else { + scale = (float) (1 << exponent); + } + decimal = 1.0f + (float) mantissa / 32; + f32.f = scale * decimal; + } + + return f32.f; +} + +static INLINE unsigned float3_to_r11g11b10f(const float rgb[3]) +{ + return ( f32_to_uf11(rgb[0]) & 0x7ff) | + ((f32_to_uf11(rgb[1]) & 0x7ff) << 11) | + ((f32_to_uf10(rgb[2]) & 0x3ff) << 22); +} + +static INLINE void r11g11b10f_to_float3(unsigned rgb, float retval[3]) +{ + retval[0] = uf11_to_f32( rgb & 0x7ff); + retval[1] = uf11_to_f32((rgb >> 11) & 0x7ff); + retval[2] = uf10_to_f32((rgb >> 22) & 0x3ff); +} diff --git a/src/gallium/auxiliary/util/u_format_rgb9e5.h b/src/gallium/auxiliary/util/u_format_rgb9e5.h new file mode 100644 index 0000000..c2a3f6f --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2011 Marek Olšák + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* Copied from EXT_texture_shared_exponent and edited. */ + +#ifndef RGB9E5_H +#define RGB9E5_H + +#include +#include + +#define RGB9E5_EXPONENT_BITS 5 +#define RGB9E5_MANTISSA_BITS 9 +#define RGB9E5_EXP_BIAS 15 +#define RGB9E5_MAX_VALID_BIASED_EXP 31 + +#define MAX_RGB9E5_EXP (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS) +#define RGB9E5_MANTISSA_VALUES (1< 0.0) { + if (x >= MAX_RGB9E5) { + return MAX_RGB9E5; + } else { + return x; + } + } else { + /* NaN gets here too since comparisons with NaN always fail! */ + return 0.0; + } +} + +/* Ok, FloorLog2 is not correct for the denorm and zero values, but we + are going to do a max of this value with the minimum rgb9e5 exponent + that will hide these problem cases. */ +static INLINE int rgb9e5_FloorLog2(float x) +{ + float754 f; + + f.value = x; + return (f.field.biasedexponent - 127); +} + +static INLINE unsigned float3_to_rgb9e5(const float rgb[3]) +{ + rgb9e5 retval; + float maxrgb; + int rm, gm, bm; + float rc, gc, bc; + int exp_shared, maxm; + double denom; + + rc = rgb9e5_ClampRange(rgb[0]); + gc = rgb9e5_ClampRange(rgb[1]); + bc = rgb9e5_ClampRange(rgb[2]); + + maxrgb = MAX3(rc, gc, bc); + exp_shared = MAX2(-RGB9E5_EXP_BIAS-1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS; + assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); + assert(exp_shared >= 0); + /* This pow function could be replaced by a table. */ + denom = pow(2, exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS); + + maxm = (int) floor(maxrgb / denom + 0.5); + if (maxm == MAX_RGB9E5_MANTISSA+1) { + denom *= 2; + exp_shared += 1; + assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); + } else { + assert(maxm <= MAX_RGB9E5_MANTISSA); + } + + rm = (int) floor(rc / denom + 0.5); + gm = (int) floor(gc / denom + 0.5); + bm = (int) floor(bc / denom + 0.5); + + assert(rm <= MAX_RGB9E5_MANTISSA); + assert(gm <= MAX_RGB9E5_MANTISSA); + assert(bm <= MAX_RGB9E5_MANTISSA); + assert(rm >= 0); + assert(gm >= 0); + assert(bm >= 0); + + retval.field.r = rm; + retval.field.g = gm; + retval.field.b = bm; + retval.field.biasedexponent = exp_shared; + + return retval.raw; +} + +static INLINE void rgb9e5_to_float3(unsigned rgb, float retval[3]) +{ + rgb9e5 v; + int exponent; + float scale; + + v.raw = rgb; + exponent = v.field.biasedexponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS; + scale = (float) pow(2, exponent); + + retval[0] = v.field.r * scale; + retval[1] = v.field.g * scale; + retval[2] = v.field.b * scale; +} + +#endif diff --git a/src/gallium/auxiliary/util/u_format_rgtc.c b/src/gallium/auxiliary/util/u_format_rgtc.c new file mode 100644 index 0000000..18550e4 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_rgtc.c @@ -0,0 +1,171 @@ +/************************************************************************** + * + * Copyright (C) 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include "u_math.h" +#include "u_format.h" +#include "u_format_rgtc.h" +#include "util/rgtc.h" + +void +util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + +void +util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, + unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + +void +util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + + +void +util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + +void +util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rxtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off) +{ +} + +void +util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rxtc2_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 1); +} + +void +util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + + +void +util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ +} + +void +util_format_rxtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off) +{ +} + +void +util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rxtc2_snorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 1); +} + +void +util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ +} + diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c new file mode 100644 index 0000000..318643b --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -0,0 +1,732 @@ +/************************************************************************** + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright (c) 2008 VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "u_math.h" +#include "u_format.h" +#include "u_format_s3tc.h" +#include "u_format_srgb.h" + + +static void +util_format_dxt1_rgb_fetch_stub(int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst) +{ + assert(0); +} + + +static void +util_format_dxt1_rgba_fetch_stub(int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst ) +{ + assert(0); +} + + +static void +util_format_dxt3_rgba_fetch_stub(int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst ) +{ + assert(0); +} + + +static void +util_format_dxt5_rgba_fetch_stub(int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst ) +{ + assert(0); +} + + +static void +util_format_dxtn_pack_stub(int src_comps, + int width, int height, + const uint8_t *src, + enum util_format_dxtn dst_format, + uint8_t *dst, + int dst_stride) +{ + assert(0); +} + + +boolean util_format_s3tc_enabled = FALSE; + +util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = util_format_dxt1_rgb_fetch_stub; +util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = util_format_dxt1_rgba_fetch_stub; +util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = util_format_dxt3_rgba_fetch_stub; +util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = util_format_dxt5_rgba_fetch_stub; + +util_format_dxtn_pack_t util_format_dxtn_pack = util_format_dxtn_pack_stub; + + +void +util_format_s3tc_init(void) +{ +} + + +/* + * Pixel fetch. + */ + +void +util_format_dxt1_rgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt1_rgb_fetch(0, src, i, j, dst); +} + +void +util_format_dxt1_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt1_rgba_fetch(0, src, i, j, dst); +} + +void +util_format_dxt3_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt3_rgba_fetch(0, src, i, j, dst); +} + +void +util_format_dxt5_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt5_rgba_fetch(0, src, i, j, dst); +} + +void +util_format_dxt1_rgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt1_rgb_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = 1.0; +} + +void +util_format_dxt1_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt1_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + +void +util_format_dxt3_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt3_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + +void +util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt5_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + + +/* + * Block decompression. + */ + +static INLINE void +util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height, + util_format_dxtn_fetch_t fetch, + unsigned block_size, boolean srgb) +{ + const unsigned bw = 4, bh = 4, comps = 4; + unsigned x, y, i, j; + for(y = 0; y < height; y += bh) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; + fetch(0, src, i, j, dst); + if (srgb) { + dst[0] = util_format_srgb_to_linear_8unorm(dst[0]); + dst[1] = util_format_srgb_to_linear_8unorm(dst[1]); + dst[2] = util_format_srgb_to_linear_8unorm(dst[2]); + } + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgb_fetch, + 8, FALSE); +} + +void +util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgba_fetch, + 8, FALSE); +} + +void +util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt3_rgba_fetch, + 16, FALSE); +} + +void +util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt5_rgba_fetch, + 16, FALSE); +} + +static INLINE void +util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height, + util_format_dxtn_fetch_t fetch, + unsigned block_size, boolean srgb) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp[4]; + fetch(0, src, i, j, tmp); + if (srgb) { + dst[0] = util_format_srgb_8unorm_to_linear_float(tmp[0]); + dst[1] = util_format_srgb_8unorm_to_linear_float(tmp[1]); + dst[2] = util_format_srgb_8unorm_to_linear_float(tmp[2]); + } + else { + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + } + dst[3] = ubyte_to_float(tmp[3]); + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgb_fetch, + 8, FALSE); +} + +void +util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgba_fetch, + 8, FALSE); +} + +void +util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt3_rgba_fetch, + 16, FALSE); +} + +void +util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt5_rgba_fetch, + 16, FALSE); +} + + +/* + * Block compression. + */ + +static INLINE void +util_format_dxtn_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height, + enum util_format_dxtn format, + unsigned block_size, boolean srgb) +{ + const unsigned bw = 4, bh = 4, comps = 4; + unsigned x, y, i, j, k; + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t src_tmp; + for(k = 0; k < 3; ++k) { + src_tmp = src[(y + j)*src_stride/sizeof(*src) + (x+i)*comps + k]; + if (srgb) { + tmp[j][i][k] = util_format_linear_to_srgb_8unorm(src_tmp); + } + else { + tmp[j][i][k] = src_tmp; + } + } + /* for sake of simplicity there's an unneeded 4th component for dxt1_rgb */ + tmp[j][i][3] = src[(y + j)*src_stride/sizeof(*src) + (x+i)*comps + 3]; + } + } + /* even for dxt1_rgb have 4 src comps */ + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], format, dst, 0); + dst += block_size; + } + dst_row += dst_stride / sizeof(*dst_row); + } + +} + +void +util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_8unorm(dst_row, dst_stride, src, src_stride, + width, height, UTIL_FORMAT_DXT1_RGB, + 8, FALSE); +} + +void +util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_8unorm(dst_row, dst_stride, src, src_stride, + width, height, UTIL_FORMAT_DXT1_RGBA, + 8, FALSE); +} + +void +util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_8unorm(dst_row, dst_stride, src, src_stride, + width, height, UTIL_FORMAT_DXT3_RGBA, + 16, FALSE); +} + +void +util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_8unorm(dst_row, dst_stride, src, src_stride, + width, height, UTIL_FORMAT_DXT5_RGBA, + 16, FALSE); +} + +static INLINE void +util_format_dxtn_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height, + enum util_format_dxtn format, + unsigned block_size, boolean srgb) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][4]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float src_tmp; + for(k = 0; k < 3; ++k) { + src_tmp = src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]; + if (srgb) { + tmp[j][i][k] = util_format_linear_float_to_srgb_8unorm(src_tmp); + } + else { + tmp[j][i][k] = float_to_ubyte(src_tmp); + } + } + /* for sake of simplicity there's an unneeded 4th component for dxt1_rgb */ + src_tmp = src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + 3]; + tmp[j][i][3] = float_to_ubyte(src_tmp); + } + } + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], format, dst, 0); + dst += block_size; + } + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + +void +util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_float(dst_row, dst_stride, src, src_stride, + width, height, UTIL_FORMAT_DXT1_RGB, + 8, FALSE); +} + +void +util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_float(dst_row, dst_stride, src, src_stride, + width, height, UTIL_FORMAT_DXT1_RGBA, + 8, FALSE); +} + +void +util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_float(dst_row, dst_stride, src, src_stride, + width, height, UTIL_FORMAT_DXT3_RGBA, + 16, FALSE); +} + +void +util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_float(dst_row, dst_stride, src, src_stride, + width, height, UTIL_FORMAT_DXT5_RGBA, + 16, FALSE); +} + + +/* + * SRGB variants. + */ + +void +util_format_dxt1_srgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt1_rgb_fetch(0, src, i, j, tmp); + dst[0] = util_format_srgb_to_linear_8unorm(tmp[0]); + dst[1] = util_format_srgb_to_linear_8unorm(tmp[1]); + dst[2] = util_format_srgb_to_linear_8unorm(tmp[2]); + dst[3] = 255; +} + +void +util_format_dxt1_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt1_rgba_fetch(0, src, i, j, tmp); + dst[0] = util_format_srgb_to_linear_8unorm(tmp[0]); + dst[1] = util_format_srgb_to_linear_8unorm(tmp[1]); + dst[2] = util_format_srgb_to_linear_8unorm(tmp[2]); + dst[3] = tmp[3]; +} + +void +util_format_dxt3_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt3_rgba_fetch(0, src, i, j, tmp); + dst[0] = util_format_srgb_to_linear_8unorm(tmp[0]); + dst[1] = util_format_srgb_to_linear_8unorm(tmp[1]); + dst[2] = util_format_srgb_to_linear_8unorm(tmp[2]); + dst[3] = tmp[3]; +} + +void +util_format_dxt5_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt5_rgba_fetch(0, src, i, j, tmp); + dst[0] = util_format_srgb_to_linear_8unorm(tmp[0]); + dst[1] = util_format_srgb_to_linear_8unorm(tmp[1]); + dst[2] = util_format_srgb_to_linear_8unorm(tmp[2]); + dst[3] = tmp[3]; +} + +void +util_format_dxt1_srgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt1_rgb_fetch(0, src, i, j, tmp); + dst[0] = util_format_srgb_8unorm_to_linear_float(tmp[0]); + dst[1] = util_format_srgb_8unorm_to_linear_float(tmp[1]); + dst[2] = util_format_srgb_8unorm_to_linear_float(tmp[2]); + dst[3] = 1.0f; +} + +void +util_format_dxt1_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt1_rgba_fetch(0, src, i, j, tmp); + dst[0] = util_format_srgb_8unorm_to_linear_float(tmp[0]); + dst[1] = util_format_srgb_8unorm_to_linear_float(tmp[1]); + dst[2] = util_format_srgb_8unorm_to_linear_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + +void +util_format_dxt3_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt3_rgba_fetch(0, src, i, j, tmp); + dst[0] = util_format_srgb_8unorm_to_linear_float(tmp[0]); + dst[1] = util_format_srgb_8unorm_to_linear_float(tmp[1]); + dst[2] = util_format_srgb_8unorm_to_linear_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + +void +util_format_dxt5_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt5_rgba_fetch(0, src, i, j, tmp); + dst[0] = util_format_srgb_8unorm_to_linear_float(tmp[0]); + dst[1] = util_format_srgb_8unorm_to_linear_float(tmp[1]); + dst[2] = util_format_srgb_8unorm_to_linear_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + +void +util_format_dxt1_srgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgb_fetch, + 8, TRUE); +} + +void +util_format_dxt1_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgba_fetch, + 8, TRUE); +} + +void +util_format_dxt3_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt3_rgba_fetch, + 16, TRUE); +} + +void +util_format_dxt5_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt5_rgba_fetch, + 16, TRUE); +} + +void +util_format_dxt1_srgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgb_fetch, + 8, TRUE); +} + +void +util_format_dxt1_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgba_fetch, + 8, TRUE); +} + +void +util_format_dxt3_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt3_rgba_fetch, + 16, TRUE); +} + +void +util_format_dxt5_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt5_rgba_fetch, + 16, TRUE); +} + +void +util_format_dxt1_srgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, + width, height, UTIL_FORMAT_DXT1_RGB, + 8, TRUE); +} + +void +util_format_dxt1_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, + width, height, UTIL_FORMAT_DXT1_RGBA, + 8, TRUE); +} + +void +util_format_dxt3_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, + width, height, UTIL_FORMAT_DXT3_RGBA, + 16, TRUE); +} + +void +util_format_dxt5_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, + width, height, UTIL_FORMAT_DXT5_RGBA, + 16, TRUE); +} + +void +util_format_dxt1_srgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, + width, height, UTIL_FORMAT_DXT1_RGB, + 8, TRUE); +} + +void +util_format_dxt1_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, + width, height, UTIL_FORMAT_DXT1_RGBA, + 8, TRUE); +} + +void +util_format_dxt3_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, + width, height, UTIL_FORMAT_DXT3_RGBA, + 16, TRUE); +} + +void +util_format_dxt5_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxtn_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, + width, height, UTIL_FORMAT_DXT5_RGBA, + 16, TRUE); +} + diff --git a/src/gallium/auxiliary/util/u_format_yuv.c b/src/gallium/auxiliary/util/u_format_yuv.c new file mode 100644 index 0000000..891d99c --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_yuv.c @@ -0,0 +1,1193 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +/** + * @file + * YUV and RGB subsampled formats conversion. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" +#include "util/u_format_yuv.h" + + +void +util_format_r8g8_b8g8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + float r, g0, g1, b; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = ubyte_to_float((value >> 0) & 0xff); + g0 = ubyte_to_float((value >> 8) & 0xff); + b = ubyte_to_float((value >> 16) & 0xff); + g1 = ubyte_to_float((value >> 24) & 0xff); + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + + dst[0] = r; /* r */ + dst[1] = g1; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = ubyte_to_float((value >> 0) & 0xff); + g0 = ubyte_to_float((value >> 8) & 0xff); + b = ubyte_to_float((value >> 16) & 0xff); + g1 = ubyte_to_float((value >> 24) & 0xff); + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r8g8_b8g8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t r, g0, g1, b; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = (value >> 0) & 0xff; + g0 = (value >> 8) & 0xff; + b = (value >> 16) & 0xff; + g1 = (value >> 24) & 0xff; + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + dst += 4; + + dst[0] = r; /* r */ + dst[1] = g1; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = (value >> 0) & 0xff; + g0 = (value >> 8) & 0xff; + b = (value >> 16) & 0xff; + g1 = (value >> 24) & 0xff; + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r8g8_b8g8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + float r, g0, g1, b; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + r = 0.5f*(src[0] + src[4]); + g0 = src[1]; + g1 = src[5]; + b = 0.5f*(src[2] + src[6]); + + value = float_to_ubyte(r); + value |= float_to_ubyte(g0) << 8; + value |= float_to_ubyte(b) << 16; + value |= float_to_ubyte(g1) << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + r = src[0]; + g0 = src[1]; + g1 = 0; + b = src[2]; + + value = float_to_ubyte(r); + value |= float_to_ubyte(g0) << 8; + value |= float_to_ubyte(b) << 16; + value |= float_to_ubyte(g1) << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r8g8_b8g8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint32_t r, g0, g1, b; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + r = (src[0] + src[4] + 1) >> 1; + g0 = src[1]; + g1 = src[5]; + b = (src[2] + src[6] + 1) >> 1; + + value = r; + value |= g0 << 8; + value |= b << 16; + value |= g1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + r = src[0]; + g0 = src[1]; + g1 = 0; + b = src[2]; + + value = r; + value |= g0 << 8; + value |= b << 16; + value |= g1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r8g8_b8g8_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + assert(i < 2); + assert(j < 1); + + dst[0] = ubyte_to_float(src[0]); /* r */ + dst[1] = ubyte_to_float(src[1 + 2*i]); /* g */ + dst[2] = ubyte_to_float(src[2]); /* b */ + dst[3] = 1.0f; /* a */ +} + + +void +util_format_g8r8_g8b8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + float r, g0, g1, b; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + g0 = ubyte_to_float((value >> 0) & 0xff); + r = ubyte_to_float((value >> 8) & 0xff); + g1 = ubyte_to_float((value >> 16) & 0xff); + b = ubyte_to_float((value >> 24) & 0xff); + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + + dst[0] = r; /* r */ + dst[1] = g1; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + g0 = ubyte_to_float((value >> 0) & 0xff); + r = ubyte_to_float((value >> 8) & 0xff); + g1 = ubyte_to_float((value >> 16) & 0xff); + b = ubyte_to_float((value >> 24) & 0xff); + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_g8r8_g8b8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t r, g0, g1, b; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + g0 = (value >> 0) & 0xff; + r = (value >> 8) & 0xff; + g1 = (value >> 16) & 0xff; + b = (value >> 24) & 0xff; + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + dst += 4; + + dst[0] = r; /* r */ + dst[1] = g1; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + g0 = (value >> 0) & 0xff; + r = (value >> 8) & 0xff; + g1 = (value >> 16) & 0xff; + b = (value >> 24) & 0xff; + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_g8r8_g8b8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + float r, g0, g1, b; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + r = 0.5f*(src[0] + src[4]); + g0 = src[1]; + g1 = src[5]; + b = 0.5f*(src[2] + src[6]); + + value = float_to_ubyte(g0); + value |= float_to_ubyte(r) << 8; + value |= float_to_ubyte(g1) << 16; + value |= float_to_ubyte(b) << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + r = src[0]; + g0 = src[1]; + g1 = 0; + b = src[2]; + + value = float_to_ubyte(g0); + value |= float_to_ubyte(r) << 8; + value |= float_to_ubyte(g1) << 16; + value |= float_to_ubyte(b) << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_g8r8_g8b8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint32_t r, g0, g1, b; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + r = (src[0] + src[4] + 1) >> 1; + g0 = src[1]; + g1 = src[5]; + b = (src[2] + src[6] + 1) >> 1; + + value = g0; + value |= r << 8; + value |= g1 << 16; + value |= b << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + r = src[0]; + g0 = src[1]; + g1 = 0; + b = src[2]; + + value = g0; + value |= r << 8; + value |= g1 << 16; + value |= b << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_g8r8_g8b8_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + assert(i < 2); + assert(j < 1); + + dst[0] = ubyte_to_float(src[1]); /* r */ + dst[1] = ubyte_to_float(src[0 + 2*i]); /* g */ + dst[2] = ubyte_to_float(src[3]); /* b */ + dst[3] = 1.0f; /* a */ +} + + +void +util_format_uyvy_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t y0, y1, u, v; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + u = (value >> 0) & 0xff; + y0 = (value >> 8) & 0xff; + v = (value >> 16) & 0xff; + y1 = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + dst += 4; + + util_format_yuv_to_rgb_float(y1, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + u = (value >> 0) & 0xff; + y0 = (value >> 8) & 0xff; + v = (value >> 16) & 0xff; + y1 = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_uyvy_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t y0, y1, u, v; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + u = (value >> 0) & 0xff; + y0 = (value >> 8) & 0xff; + v = (value >> 16) & 0xff; + y1 = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + dst += 4; + + util_format_yuv_to_rgb_8unorm(y1, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + u = (value >> 0) & 0xff; + y0 = (value >> 8) & 0xff; + v = (value >> 16) & 0xff; + y1 = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_uyvy_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint8_t y0, y1, u, v; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + uint8_t y0, y1, u0, u1, v0, v1, u, v; + + util_format_rgb_float_to_yuv(src[0], src[1], src[2], + &y0, &u0, &v0); + util_format_rgb_float_to_yuv(src[4], src[5], src[6], + &y1, &u1, &v1); + + u = (u0 + u1 + 1) >> 1; + v = (v0 + v1 + 1) >> 1; + + value = u; + value |= y0 << 8; + value |= v << 16; + value |= y1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + util_format_rgb_float_to_yuv(src[0], src[1], src[2], + &y0, &u, &v); + y1 = 0; + + value = u; + value |= y0 << 8; + value |= v << 16; + value |= y1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_uyvy_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint8_t y0, y1, u, v; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + uint8_t y0, y1, u0, u1, v0, v1, u, v; + + util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2], + &y0, &u0, &v0); + util_format_rgb_8unorm_to_yuv(src[4], src[5], src[6], + &y1, &u1, &v1); + + u = (u0 + u1 + 1) >> 1; + v = (v0 + v1 + 1) >> 1; + + value = u; + value |= y0 << 8; + value |= v << 16; + value |= y1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2], + &y0, &u, &v); + y1 = 0; + + value = u; + value |= y0 << 8; + value |= v << 16; + value |= y1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_uyvy_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + uint8_t y, u, v; + + assert(i < 2); + assert(j < 1); + + y = src[1 + i*2]; + u = src[0]; + v = src[2]; + + util_format_yuv_to_rgb_float(y, u, v, &dst[0], &dst[1], &dst[2]); + + dst[3] = 1.0f; +} + + +void +util_format_yuyv_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t y0, y1, u, v; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + y0 = (value >> 0) & 0xff; + u = (value >> 8) & 0xff; + y1 = (value >> 16) & 0xff; + v = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + dst += 4; + + util_format_yuv_to_rgb_float(y1, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + y0 = (value >> 0) & 0xff; + u = (value >> 8) & 0xff; + y1 = (value >> 16) & 0xff; + v = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_yuyv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t y0, y1, u, v; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + y0 = (value >> 0) & 0xff; + u = (value >> 8) & 0xff; + y1 = (value >> 16) & 0xff; + v = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + dst += 4; + + util_format_yuv_to_rgb_8unorm(y1, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + y0 = (value >> 0) & 0xff; + u = (value >> 8) & 0xff; + y1 = (value >> 16) & 0xff; + v = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_yuyv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint8_t y0, y1, u, v; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + uint8_t y0, y1, u0, u1, v0, v1, u, v; + + util_format_rgb_float_to_yuv(src[0], src[1], src[2], + &y0, &u0, &v0); + util_format_rgb_float_to_yuv(src[4], src[5], src[6], + &y1, &u1, &v1); + + u = (u0 + u1 + 1) >> 1; + v = (v0 + v1 + 1) >> 1; + + value = y0; + value |= u << 8; + value |= y1 << 16; + value |= v << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + util_format_rgb_float_to_yuv(src[0], src[1], src[2], + &y0, &u, &v); + y1 = 0; + + value = y0; + value |= u << 8; + value |= y1 << 16; + value |= v << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_yuyv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint8_t y0, y1, u, v; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + uint8_t y0, y1, u0, u1, v0, v1, u, v; + + util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2], + &y0, &u0, &v0); + util_format_rgb_8unorm_to_yuv(src[4], src[5], src[6], + &y1, &u1, &v1); + + u = (u0 + u1 + 1) >> 1; + v = (v0 + v1 + 1) >> 1; + + value = y0; + value |= u << 8; + value |= y1 << 16; + value |= v << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2], + &y0, &u, &v); + y1 = 0; + + value = y0; + value |= u << 8; + value |= y1 << 16; + value |= v << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_yuyv_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + uint8_t y, u, v; + + assert(i < 2); + assert(j < 1); + + y = src[0 + i*2]; + u = src[1]; + v = src[3]; + + util_format_yuv_to_rgb_float(y, u, v, &dst[0], &dst[1], &dst[2]); + + dst[3] = 1.0f; +} + +/* XXX: Stubbed for now */ +void +util_format_yv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv12_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv12_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_yv16_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv16_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv16_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv16_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_yv16_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_iyuv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_iyuv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_iyuv_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_iyuv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_iyuv_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_nv12_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv12_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv12_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv12_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv12_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} +void +util_format_nv21_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv21_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv21_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv21_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} +void +util_format_nv21_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} + +void +util_format_r8g8_r8b8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} + +void +util_format_r8g8_r8b8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} + +void +util_format_r8g8_r8b8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} + +void +util_format_r8g8_r8b8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} + +void +util_format_r8g8_r8b8_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} + +void +util_format_g8r8_b8r8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} + +void +util_format_g8r8_b8r8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} + +void +util_format_g8r8_b8r8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) {} + +void +util_format_g8r8_b8r8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) {} + +void +util_format_g8r8_b8r8_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) {} diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c new file mode 100644 index 0000000..ed45c52 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_zs.c @@ -0,0 +1,973 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "u_debug.h" +#include "u_math.h" +#include "u_format_zs.h" + + +/* + * z32_unorm conversion functions + */ + +static INLINE uint16_t +z32_unorm_to_z16_unorm(uint32_t z) +{ + /* z * 0xffff / 0xffffffff */ + return z >> 16; +} + +static INLINE uint32_t +z16_unorm_to_z32_unorm(uint16_t z) +{ + /* z * 0xffffffff / 0xffff */ + return (z << 16) | z; +} + +static INLINE uint32_t +z32_unorm_to_z24_unorm(uint32_t z) +{ + /* z * 0xffffff / 0xffffffff */ + return z >> 8; +} + +static INLINE uint32_t +z24_unorm_to_z32_unorm(uint32_t z) +{ + /* z * 0xffffffff / 0xffffff */ + return (z << 8) | (z >> 16); +} + + +/* + * z32_float conversion functions + */ + +static INLINE uint16_t +z32_float_to_z16_unorm(float z) +{ + const float scale = 0xffff; + return (uint16_t)(z * scale + 0.5f); +} + +static INLINE float +z16_unorm_to_z32_float(uint16_t z) +{ + const float scale = 1.0 / 0xffff; + return (float)(z * scale); +} + +static INLINE uint32_t +z32_float_to_z24_unorm(float z) +{ + const double scale = 0xffffff; + return (uint32_t)(z * scale) & 0xffffff; +} + +static INLINE float +z24_unorm_to_z32_float(uint32_t z) +{ + const double scale = 1.0 / 0xffffff; + return (float)(z * scale); +} + +static INLINE uint32_t +z32_float_to_z32_unorm(float z) +{ + const double scale = 0xffffffff; + return (uint32_t)(z * scale); +} + +static INLINE float +z32_unorm_to_z32_float(uint32_t z) +{ + const double scale = 1.0 / 0xffffffff; + return (float)(z * scale); +} + + +void +util_format_s8_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_s8_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z16_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; ++x) { + uint16_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + *dst++ = z16_unorm_to_z32_float(value); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z16_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint16_t *dst = (uint16_t *)dst_row; + for(x = 0; x < width; ++x) { + uint16_t value; + value = z32_float_to_z16_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z16_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; ++x) { + uint16_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + *dst++ = z16_unorm_to_z32_unorm(value); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z16_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint16_t *dst = (uint16_t *)dst_row; + for(x = 0; x < width; ++x) { + uint16_t value; + value = z32_unorm_to_z16_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z32_unorm_to_z32_float(value); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_float_to_z32_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width * 4); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width * 4); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width * 4); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width * 4); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const float *src = (const float *)src_row; + for(x = 0; x < width; ++x) { + *dst++ = z32_float_to_z32_unorm(*src++); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + float *dst = (float *)dst_row; + for(x = 0; x < width; ++x) { + *dst++ = z32_unorm_to_z32_float(*src++); + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24_unorm_s8_uint_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_float(value & 0xffffff); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24_unorm_s8_uint_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0xff000000; + value |= z32_float_to_z24_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24_unorm_s8_uint_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24_unorm_s8_uint_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value= *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0xff000000; + value |= z32_unorm_to_z24_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24_unorm_s8_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value >> 24; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24_unorm_s8_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0x00ffffff; + value |= *src++ << 24; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_s8_uint_z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_float(value >> 8); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_s8_uint_z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0x000000ff; + value |= z32_float_to_z24_unorm(*src++) << 8; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_s8_uint_z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_unorm(value >> 8); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_s8_uint_z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0x000000ff; + value |= *src++ & 0xffffff00; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_s8_uint_z24_unorm_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value & 0xff; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_s8_uint_z24_unorm_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0xffffff00; + value |= *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24x8_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_float(value & 0xffffff); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24x8_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_float_to_z24_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24x8_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_unorm_to_z24_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_x8z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_float(value >> 8); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_x8z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_float_to_z24_unorm(*src++) << 8; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_unorm(value >> 8); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_x8z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_unorm_to_z24_unorm(*src++) << 8; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_float_s8x24_uint_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const float *src = (const float *)src_row; + for(x = 0; x < width; ++x) { + *dst = *src; + src += 2; + dst += 1; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_s8x24_uint_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + float *dst = (float *)dst_row; + for(x = 0; x < width; ++x) { + *dst = *src; + src += 1; + dst += 2; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_float_s8x24_uint_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const float *src = (const float *)src_row; + for(x = 0; x < width; ++x) { + *dst = z32_float_to_z32_unorm(*src); + src += 2; + dst += 1; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_s8x24_uint_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + float *dst = (float *)dst_row; + for(x = 0; x < width; ++x) { + *dst++ = z32_unorm_to_z32_float(*src++); + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_float_s8x24_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint8_t *dst = dst_row; + const uint8_t *src = src_row + 4; + for(x = 0; x < width; ++x) { + *dst = *src; + src += 8; + dst += 1; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_s8x24_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint8_t *src = src_row; + uint8_t *dst = dst_row + 4; + for(x = 0; x < width; ++x) { + *dst = *src; + src += 1; + dst += 8; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_x24s8_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uint_unpack_s_8uint(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x24s8_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uint_pack_s_8uint(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uint_z24_unorm_unpack_s_8uint(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uint_z24_unorm_pack_s_8uint(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x32_s8x24_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uint_unpack_s_8uint(dst_row, dst_stride, + src_row, src_stride, + width, height); + +} + +void +util_format_x32_s8x24_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uint_pack_s_8uint(dst_row, dst_stride, + src_row, src_stride, + width, height); +} diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c new file mode 100644 index 0000000..ae9e951 --- /dev/null +++ b/src/gallium/auxiliary/util/u_math.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "pipe/p_config.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" + +#if defined(PIPE_ARCH_SSE) +#include +/* This is defined in pmmintrin.h, but it can only be included when -msse3 is + * used, so just define it here to avoid further. */ +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif + + +/** 2^x, for x in [-1.0, 1.0) */ +float pow2_table[POW2_TABLE_SIZE]; + + +static void +init_pow2_table(void) +{ + int i; + for (i = 0; i < POW2_TABLE_SIZE; i++) + pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE); +} + + +/** log2(x), for x in [1.0, 2.0) */ +float log2_table[LOG2_TABLE_SIZE]; + + +static void +init_log2_table(void) +{ + unsigned i; + for (i = 0; i < LOG2_TABLE_SIZE; i++) + log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SCALE)); +} + + +/** + * One time init for math utilities. + */ +void +util_init_math(void) +{ + static boolean initialized = FALSE; + if (!initialized) { + init_pow2_table(); + init_log2_table(); + initialized = TRUE; + } +} + +/** + * Fetches the contents of the fpstate (mxcsr on x86) register. + * + * On platforms without support for it just returns 0. + */ +unsigned +util_fpstate_get(void) +{ + unsigned mxcsr = 0; + +#if defined(PIPE_ARCH_SSE) + if (util_cpu_caps.has_sse) { + mxcsr = _mm_getcsr(); + } +#endif + + return mxcsr; +} + +/** + * Make sure that the fp treats the denormalized floating + * point numbers as zero. + * + * This is the behavior required by D3D10. OpenGL doesn't care. + */ +unsigned +util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) +{ +#if defined(PIPE_ARCH_SSE) + if (util_cpu_caps.has_sse) { + /* Enable flush to zero mode */ + current_mxcsr |= _MM_FLUSH_ZERO_MASK; + if (util_cpu_caps.has_daz) { + /* Enable denormals are zero mode */ + current_mxcsr |= _MM_DENORMALS_ZERO_MASK; + } + util_fpstate_set(current_mxcsr); + } +#endif + return current_mxcsr; +} + +/** + * Set the state of the fpstate (mxcsr on x86) register. + * + * On platforms without support for it's a noop. + */ +void +util_fpstate_set(unsigned mxcsr) +{ +#if defined(PIPE_ARCH_SSE) + if (util_cpu_caps.has_sse) { + _mm_setcsr(mxcsr); + } +#endif +} diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index ffbcc4c..5577e62 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -112,10 +112,13 @@ static INLINE float logf( float f ) #define logf(x) ((float)log((double)(x))) #endif /* logf */ +#if _MSC_VER < 1800 #define isfinite(x) _finite((double)(x)) #define isnan(x) _isnan((double)(x)) +#endif /* _MSC_VER < 1800 */ #endif /* _MSC_VER < 1400 && !defined(__cplusplus) */ +#if _MSC_VER < 1800 static INLINE double log2( double x ) { const double invln2 = 1.442695041; @@ -133,34 +136,18 @@ roundf(float x) { return x >= 0.0f ? floorf(x + 0.5f) : ceilf(x - 0.5f); } +#endif +#ifndef INFINITY #define INFINITY (DBL_MAX + DBL_MAX) -#define NAN (INFINITY - INFINITY) - -#endif /* _MSC_VER */ - - -#ifdef PIPE_OS_ANDROID - -static INLINE -double log2(double d) -{ - return log(d) * (1.0 / M_LN2); -} - -/* workaround a conflict with main/imports.h */ -#ifdef log2f -#undef log2f #endif -static INLINE -float log2f(float f) -{ - return logf(f) * (float) (1.0 / M_LN2); -} - +#ifndef NAN +#define NAN (INFINITY - INFINITY) #endif +#endif /* _MSC_VER */ + #if __STDC_VERSION__ < 199901L && (!defined(__cplusplus) || defined(_MSC_VER)) static INLINE long int @@ -563,6 +550,22 @@ static INLINE unsigned util_last_bit(unsigned u) #endif } +/** + * Find last bit in a word that does not match the sign bit. The least + * significant bit is 1. + * Return 0 if no bits are set. + */ +static INLINE unsigned util_last_bit_signed(int i) +{ +#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 407) + return 31 - __builtin_clrsb(i); +#else + if (i >= 0) + return util_last_bit(i); + else + return util_last_bit(~(unsigned)i); +#endif +} /* Destructively loop over all of the bits in a mask as in: * @@ -711,6 +714,21 @@ util_bitcount(unsigned n) #endif } +/** + * Reverse bits in n + * Algorithm taken from: + * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer + */ +static INLINE unsigned +util_bitreverse(unsigned n) +{ + n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1); + n = ((n >> 2) & 0x33333333u) | ((n & 0x33333333u) << 2); + n = ((n >> 4) & 0x0f0f0f0fu) | ((n & 0x0f0f0f0fu) << 4); + n = ((n >> 8) & 0x00ff00ffu) | ((n & 0x00ff00ffu) << 8); + n = ((n >> 16) & 0xffffu) | ((n & 0xffffu) << 16); + return n; +} /** * Convert from little endian to CPU byte order. diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 166c68b..e0c9018 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -51,7 +51,7 @@ union util_color { ubyte ub; ushort us; - uint ui; + uint ui[4]; ushort h[4]; /* half float */ float f[4]; double d[4]; @@ -67,32 +67,32 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, switch (format) { case PIPE_FORMAT_ABGR8888_UNORM: { - uc->ui = (r << 24) | (g << 16) | (b << 8) | a; + uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | a; } return; case PIPE_FORMAT_XBGR8888_UNORM: { - uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff; + uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | 0xff; } return; case PIPE_FORMAT_BGRA8888_UNORM: { - uc->ui = (a << 24) | (r << 16) | (g << 8) | b; + uc->ui[0] = (a << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_BGRX8888_UNORM: { - uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b; + uc->ui[0] = (0xff << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_ARGB8888_UNORM: { - uc->ui = (b << 24) | (g << 16) | (r << 8) | a; + uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | a; } return; case PIPE_FORMAT_XRGB8888_UNORM: { - uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff; + uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | 0xff; } return; case PIPE_FORMAT_B5G6R5_UNORM: @@ -168,7 +168,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc, switch (format) { case PIPE_FORMAT_ABGR8888_UNORM: { - uint p = uc->ui; + uint p = uc->ui[0]; *r = (ubyte) ((p >> 24) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 8) & 0xff); @@ -177,7 +177,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc, return; case PIPE_FORMAT_XBGR8888_UNORM: { - uint p = uc->ui; + uint p = uc->ui[0]; *r = (ubyte) ((p >> 24) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 8) & 0xff); @@ -186,7 +186,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc, return; case PIPE_FORMAT_BGRA8888_UNORM: { - uint p = uc->ui; + uint p = uc->ui[0]; *r = (ubyte) ((p >> 16) & 0xff); *g = (ubyte) ((p >> 8) & 0xff); *b = (ubyte) ((p >> 0) & 0xff); @@ -195,7 +195,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc, return; case PIPE_FORMAT_BGRX8888_UNORM: { - uint p = uc->ui; + uint p = uc->ui[0]; *r = (ubyte) ((p >> 16) & 0xff); *g = (ubyte) ((p >> 8) & 0xff); *b = (ubyte) ((p >> 0) & 0xff); @@ -204,7 +204,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc, return; case PIPE_FORMAT_ARGB8888_UNORM: { - uint p = uc->ui; + uint p = uc->ui[0]; *r = (ubyte) ((p >> 8) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 24) & 0xff); @@ -213,7 +213,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc, return; case PIPE_FORMAT_XRGB8888_UNORM: { - uint p = uc->ui; + uint p = uc->ui[0]; *r = (ubyte) ((p >> 8) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 24) & 0xff); @@ -352,32 +352,32 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * switch (format) { case PIPE_FORMAT_ABGR8888_UNORM: { - uc->ui = (r << 24) | (g << 16) | (b << 8) | a; + uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | a; } return; case PIPE_FORMAT_XBGR8888_UNORM: { - uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff; + uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | 0xff; } return; case PIPE_FORMAT_BGRA8888_UNORM: { - uc->ui = (a << 24) | (r << 16) | (g << 8) | b; + uc->ui[0] = (a << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_BGRX8888_UNORM: { - uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b; + uc->ui[0] = (0xff << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_ARGB8888_UNORM: { - uc->ui = (b << 24) | (g << 16) | (r << 8) | a; + uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | a; } return; case PIPE_FORMAT_XRGB8888_UNORM: { - uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff; + uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | 0xff; } return; case PIPE_FORMAT_B5G6R5_UNORM: diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h new file mode 100644 index 0000000..cf29dff --- /dev/null +++ b/src/gallium/auxiliary/util/u_rect.h @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_RECT_H +#define U_RECT_H + +#include "pipe/p_compiler.h" +#include "util/u_math.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct u_rect { + int x0, x1; + int y0, y1; +}; + +/* Do two rectangles intersect? + */ +static INLINE boolean +u_rect_test_intersection(const struct u_rect *a, + const struct u_rect *b) +{ + return (!(a->x1 < b->x0 || + b->x1 < a->x0 || + a->y1 < b->y0 || + b->y1 < a->y0)); +} + +/* Find the intersection of two rectangles known to intersect. + */ +static INLINE void +u_rect_find_intersection(const struct u_rect *a, + struct u_rect *b) +{ + /* Caller should verify intersection exists before calling. + */ + if (b->x0 < a->x0) b->x0 = a->x0; + if (b->x1 > a->x1) b->x1 = a->x1; + if (b->y0 < a->y0) b->y0 = a->y0; + if (b->y1 > a->y1) b->y1 = a->y1; +} + + +static INLINE int +u_rect_area(const struct u_rect *r) +{ + return (r->x1 - r->x0) * (r->y1 - r->y0); +} + +static INLINE void +u_rect_possible_intersection(const struct u_rect *a, + struct u_rect *b) +{ + if (u_rect_test_intersection(a,b)) { + u_rect_find_intersection(a,b); + } + else { + b->x0 = b->x1 = b->y0 = b->y1 = 0; + } +} + +/* Set @d to a rectangle that covers both @a and @b. + */ +static INLINE void +u_rect_union(struct u_rect *d, const struct u_rect *a, const struct u_rect *b) +{ + d->x0 = MIN2(a->x0, b->x0); + d->y0 = MIN2(a->y0, b->y0); + d->x1 = MAX2(a->x1, b->x1); + d->y1 = MAX2(a->y1, b->y1); +} + +#ifdef __cplusplus +} +#endif + +#endif /* U_RECT_H */ diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c new file mode 100644 index 0000000..654b5bb --- /dev/null +++ b/src/gallium/auxiliary/util/u_surface.c @@ -0,0 +1,684 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Surface utility functions. + * + * @author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_rect.h" +#include "util/u_surface.h" +#include "util/u_pack_color.h" + + +/** + * Initialize a pipe_surface object. 'view' is considered to have + * uninitialized contents. + */ +void +u_surface_default_template(struct pipe_surface *surf, + const struct pipe_resource *texture) +{ + memset(surf, 0, sizeof(*surf)); + + surf->format = texture->format; +} + + +/** + * Copy 2D rect from one place to another. + * Position and sizes are in pixels. + * src_stride may be negative to do vertical flip of pixels from source. + */ +void +util_copy_rect(ubyte * dst, + enum pipe_format format, + unsigned dst_stride, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + const ubyte * src, + int src_stride, + unsigned src_x, + unsigned src_y) +{ + unsigned i; + int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; + int blocksize = util_format_get_blocksize(format); + int blockwidth = util_format_get_blockwidth(format); + int blockheight = util_format_get_blockheight(format); + + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); + + dst_x /= blockwidth; + dst_y /= blockheight; + width = (width + blockwidth - 1)/blockwidth; + height = (height + blockheight - 1)/blockheight; + src_x /= blockwidth; + src_y /= blockheight; + + dst += dst_x * blocksize; + src += src_x * blocksize; + dst += dst_y * dst_stride; + src += src_y * src_stride_pos; + width *= blocksize; + + if (width == dst_stride && width == src_stride) + memcpy(dst, src, height * width); + else { + for (i = 0; i < height; i++) { + memcpy(dst, src, width); + dst += dst_stride; + src += src_stride; + } + } +} + + +/** + * Copy 3D box from one place to another. + * Position and sizes are in pixels. + */ +void +util_copy_box(ubyte * dst, + enum pipe_format format, + unsigned dst_stride, unsigned dst_slice_stride, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + unsigned width, unsigned height, unsigned depth, + const ubyte * src, + int src_stride, unsigned src_slice_stride, + unsigned src_x, unsigned src_y, unsigned src_z) +{ + unsigned z; + dst += dst_z * dst_slice_stride; + src += src_z * src_slice_stride; + for (z = 0; z < depth; ++z) { + util_copy_rect(dst, + format, + dst_stride, + dst_x, dst_y, + width, height, + src, + src_stride, + src_x, src_y); + + dst += dst_slice_stride; + src += src_slice_stride; + } +} + + +void +util_fill_rect(ubyte * dst, + enum pipe_format format, + unsigned dst_stride, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + union util_color *uc) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i, j; + unsigned width_size; + int blocksize = desc->block.bits / 8; + int blockwidth = desc->block.width; + int blockheight = desc->block.height; + + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); + + dst_x /= blockwidth; + dst_y /= blockheight; + width = (width + blockwidth - 1)/blockwidth; + height = (height + blockheight - 1)/blockheight; + + dst += dst_x * blocksize; + dst += dst_y * dst_stride; + width_size = width * blocksize; + + switch (blocksize) { + case 1: + if(dst_stride == width_size) + memset(dst, uc->ub, height * width_size); + else { + for (i = 0; i < height; i++) { + memset(dst, uc->ub, width_size); + dst += dst_stride; + } + } + break; + case 2: + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = uc->us; + dst += dst_stride; + } + break; + case 4: + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) + *row++ = uc->ui[0]; + dst += dst_stride; + } + break; + default: + for (i = 0; i < height; i++) { + ubyte *row = dst; + for (j = 0; j < width; j++) { + memcpy(row, uc, blocksize); + row += blocksize; + } + dst += dst_stride; + } + break; + } +} + + +void +util_fill_box(ubyte * dst, + enum pipe_format format, + unsigned stride, + unsigned layer_stride, + unsigned x, + unsigned y, + unsigned z, + unsigned width, + unsigned height, + unsigned depth, + union util_color *uc) +{ + unsigned layer; + dst += z * layer_stride; + for (layer = z; layer < depth; layer++) { + util_fill_rect(dst, format, + stride, + x, y, width, height, uc); + dst += layer_stride; + } +} + + +/** + * Fallback function for pipe->resource_copy_region(). + * Note: (X,Y)=(0,0) is always the upper-left corner. + */ +void +util_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct pipe_transfer *src_trans, *dst_trans; + uint8_t *dst_map; + const uint8_t *src_map; + enum pipe_format src_format, dst_format; + struct pipe_box dst_box; + + assert(src && dst); + if (!src || !dst) + return; + + assert((src->target == PIPE_BUFFER && dst->target == PIPE_BUFFER) || + (src->target != PIPE_BUFFER && dst->target != PIPE_BUFFER)); + + src_format = src->format; + dst_format = dst->format; + + assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format)); + assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format)); + assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format)); + + src_map = pipe->transfer_map(pipe, + src, + src_level, + PIPE_TRANSFER_READ, + src_box, &src_trans); + assert(src_map); + if (!src_map) { + goto no_src_map; + } + + dst_box.x = dst_x; + dst_box.y = dst_y; + dst_box.z = dst_z; + dst_box.width = src_box->width; + dst_box.height = src_box->height; + dst_box.depth = src_box->depth; + + dst_map = pipe->transfer_map(pipe, + dst, + dst_level, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, + &dst_box, &dst_trans); + assert(dst_map); + if (!dst_map) { + goto no_dst_map; + } + + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + assert(src_box->height == 1); + assert(src_box->depth == 1); + memcpy(dst_map, src_map, src_box->width); + } else { + util_copy_box(dst_map, + dst_format, + dst_trans->stride, dst_trans->layer_stride, + 0, 0, 0, + src_box->width, src_box->height, src_box->depth, + src_map, + src_trans->stride, src_trans->layer_stride, + 0, 0, 0); + } + + pipe->transfer_unmap(pipe, dst_trans); +no_dst_map: + pipe->transfer_unmap(pipe, src_trans); +no_src_map: + ; +} + + + +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) + + +/** + * Fallback for pipe->clear_render_target() function. + * XXX this looks too hackish to be really useful. + * cpp > 4 looks like a gross hack at best... + * Plus can't use these transfer fallbacks when clearing + * multisampled surfaces for instance. + * Clears all bound layers. + */ +void +util_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct pipe_transfer *dst_trans; + ubyte *dst_map; + union util_color uc; + unsigned max_layer; + + assert(dst->texture); + if (!dst->texture) + return; + + if (dst->texture->target == PIPE_BUFFER) { + /* + * The fill naturally works on the surface format, however + * the transfer uses resource format which is just bytes for buffers. + */ + unsigned dx, w; + unsigned pixstride = util_format_get_blocksize(dst->format); + dx = (dst->u.buf.first_element + dstx) * pixstride; + w = width * pixstride; + max_layer = 0; + dst_map = pipe_transfer_map(pipe, + dst->texture, + 0, 0, + PIPE_TRANSFER_WRITE, + dx, 0, w, 1, + &dst_trans); + } + else { + max_layer = dst->u.tex.last_layer - dst->u.tex.first_layer; + dst_map = pipe_transfer_map_3d(pipe, + dst->texture, + dst->u.tex.level, + PIPE_TRANSFER_WRITE, + dstx, dsty, dst->u.tex.first_layer, + width, height, max_layer + 1, &dst_trans); + } + + assert(dst_map); + + if (dst_map) { + enum pipe_format format = dst->format; + assert(dst_trans->stride > 0); + + if (util_format_is_pure_integer(format)) { + /* + * We expect int/uint clear values here, though some APIs + * might disagree (but in any case util_pack_color() + * couldn't handle it)... + */ + if (util_format_is_pure_sint(format)) { + util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1); + } + else { + assert(util_format_is_pure_uint(format)); + util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1); + } + } + else { + util_pack_color(color->f, dst->format, &uc); + } + + util_fill_box(dst_map, dst->format, + dst_trans->stride, dst_trans->layer_stride, + 0, 0, 0, width, height, max_layer + 1, &uc); + + pipe->transfer_unmap(pipe, dst_trans); + } +} + +/** + * Fallback for pipe->clear_stencil() function. + * sw fallback doesn't look terribly useful here. + * Plus can't use these transfer fallbacks when clearing + * multisampled surfaces for instance. + * Clears all bound layers. + */ +void +util_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + enum pipe_format format = dst->format; + struct pipe_transfer *dst_trans; + ubyte *dst_map; + boolean need_rmw = FALSE; + unsigned max_layer, layer; + + if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) && + ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && + util_format_is_depth_and_stencil(format)) + need_rmw = TRUE; + + assert(dst->texture); + if (!dst->texture) + return; + + max_layer = dst->u.tex.last_layer - dst->u.tex.first_layer; + dst_map = pipe_transfer_map_3d(pipe, + dst->texture, + dst->u.tex.level, + (need_rmw ? PIPE_TRANSFER_READ_WRITE : + PIPE_TRANSFER_WRITE), + dstx, dsty, dst->u.tex.first_layer, + width, height, max_layer + 1, &dst_trans); + assert(dst_map); + + if (dst_map) { + unsigned dst_stride = dst_trans->stride; + uint64_t zstencil = util_pack64_z_stencil(format, depth, stencil); + ubyte *dst_layer = dst_map; + unsigned i, j; + assert(dst_trans->stride > 0); + + for (layer = 0; layer <= max_layer; layer++) { + dst_map = dst_layer; + + switch (util_format_get_blocksize(format)) { + case 1: + assert(format == PIPE_FORMAT_S8_UINT); + if(dst_stride == width) + memset(dst_map, (uint8_t) zstencil, height * width); + else { + for (i = 0; i < height; i++) { + memset(dst_map, (uint8_t) zstencil, width); + dst_map += dst_stride; + } + } + break; + case 2: + assert(format == PIPE_FORMAT_Z16_UNORM); + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = (uint16_t) zstencil; + dst_map += dst_stride; + } + break; + case 4: + if (!need_rmw) { + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = (uint32_t) zstencil; + dst_map += dst_stride; + } + } + else { + uint32_t dst_mask; + if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) + dst_mask = 0x00ffffff; + else { + assert(format == PIPE_FORMAT_S8_UINT_Z24_UNORM); + dst_mask = 0xffffff00; + } + if (clear_flags & PIPE_CLEAR_DEPTH) + dst_mask = ~dst_mask; + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst_map; + for (j = 0; j < width; j++) { + uint32_t tmp = *row & dst_mask; + *row++ = tmp | ((uint32_t) zstencil & ~dst_mask); + } + dst_map += dst_stride; + } + } + break; + case 8: + if (!need_rmw) { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = zstencil; + dst_map += dst_stride; + } + } + else { + uint64_t src_mask; + + if (clear_flags & PIPE_CLEAR_DEPTH) + src_mask = 0x00000000ffffffffull; + else + src_mask = 0x000000ff00000000ull; + + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) { + uint64_t tmp = *row & ~src_mask; + *row++ = tmp | (zstencil & src_mask); + } + dst_map += dst_stride; + } + } + break; + default: + assert(0); + break; + } + dst_layer += dst_trans->layer_stride; + } + + pipe->transfer_unmap(pipe, dst_trans); + } +} + + +/* Return if the box is totally inside the resource. + */ +static boolean +is_box_inside_resource(const struct pipe_resource *res, + const struct pipe_box *box, + unsigned level) +{ + unsigned width = 1, height = 1, depth = 1; + + switch (res->target) { + case PIPE_BUFFER: + width = res->width0; + height = 1; + depth = 1; + break; + case PIPE_TEXTURE_1D: + width = u_minify(res->width0, level); + height = 1; + depth = 1; + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + width = u_minify(res->width0, level); + height = u_minify(res->height0, level); + depth = 1; + break; + case PIPE_TEXTURE_3D: + width = u_minify(res->width0, level); + height = u_minify(res->height0, level); + depth = u_minify(res->depth0, level); + break; + case PIPE_TEXTURE_CUBE: + width = u_minify(res->width0, level); + height = u_minify(res->height0, level); + depth = 6; + break; + case PIPE_TEXTURE_1D_ARRAY: + width = u_minify(res->width0, level); + height = 1; + depth = res->array_size; + break; + case PIPE_TEXTURE_2D_ARRAY: + width = u_minify(res->width0, level); + height = u_minify(res->height0, level); + depth = res->array_size; + break; + case PIPE_TEXTURE_CUBE_ARRAY: + width = u_minify(res->width0, level); + height = u_minify(res->height0, level); + depth = res->array_size; + assert(res->array_size % 6 == 0); + break; + case PIPE_MAX_TEXTURE_TYPES:; + } + + return box->x >= 0 && + box->x + box->width <= (int) width && + box->y >= 0 && + box->y + box->height <= (int) height && + box->z >= 0 && + box->z + box->depth <= (int) depth; +} + +static unsigned +get_sample_count(const struct pipe_resource *res) +{ + return res->nr_samples ? res->nr_samples : 1; +} + +/** + * Try to do a blit using resource_copy_region. The function calls + * resource_copy_region if the blit description is compatible with it. + * + * It returns TRUE if the blit was done using resource_copy_region. + * + * It returns FALSE otherwise and the caller must fall back to a more generic + * codepath for the blit operation. (e.g. by using u_blitter) + */ +boolean +util_try_blit_via_copy_region(struct pipe_context *ctx, + const struct pipe_blit_info *blit) +{ + unsigned mask = util_format_get_mask(blit->dst.format); + + /* No format conversions. */ + if (blit->src.resource->format != blit->src.format || + blit->dst.resource->format != blit->dst.format || + !util_is_format_compatible( + util_format_description(blit->src.resource->format), + util_format_description(blit->dst.resource->format))) { + return FALSE; + } + + /* No masks, no filtering, no scissor. */ + if ((blit->mask & mask) != mask || + blit->filter != PIPE_TEX_FILTER_NEAREST || + blit->scissor_enable) { + return FALSE; + } + + /* No flipping. */ + if (blit->src.box.width < 0 || + blit->src.box.height < 0 || + blit->src.box.depth < 0) { + return FALSE; + } + + /* No scaling. */ + if (blit->src.box.width != blit->dst.box.width || + blit->src.box.height != blit->dst.box.height || + blit->src.box.depth != blit->dst.box.depth) { + return FALSE; + } + + /* No out-of-bounds access. */ + if (!is_box_inside_resource(blit->src.resource, &blit->src.box, + blit->src.level) || + !is_box_inside_resource(blit->dst.resource, &blit->dst.box, + blit->dst.level)) { + return FALSE; + } + + /* Sample counts must match. */ + if (get_sample_count(blit->src.resource) != + get_sample_count(blit->dst.resource)) { + return FALSE; + } + + ctx->resource_copy_region(ctx, blit->dst.resource, blit->dst.level, + blit->dst.box.x, blit->dst.box.y, blit->dst.box.z, + blit->src.resource, blit->src.level, + &blit->src.box); + return TRUE; +} diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index a220de0..fd32c4a 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -350,6 +350,7 @@ enum pipe_flush_flags { #define PIPE_BIND_GLOBAL (1 << 18) /* set_global_binding */ #define PIPE_BIND_SHADER_RESOURCE (1 << 19) /* set_shader_resources */ #define PIPE_BIND_COMPUTE_RESOURCE (1 << 20) /* set_compute_resources */ +#define PIPE_BIND_COMMAND_ARGS_BUFFER (1 << 21) /* pipe_draw_info.indirect */ /* The first two flags above were previously part of the amorphous * TEXTURE_USAGE, most of which are now descriptions of the ways a @@ -546,12 +547,33 @@ enum pipe_cap { PIPE_CAP_MAX_VIEWPORTS = 84, PIPE_CAP_ENDIANNESS = 85, PIPE_CAP_MIXED_FRAMEBUFFER_SIZES = 86, - PIPE_CAP_TGSI_VS_LAYER = 87, + PIPE_CAP_TGSI_VS_LAYER_VIEWPORT = 87, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES = 88, PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS = 89, PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 90, PIPE_CAP_TEXTURE_GATHER_SM5 = 91, - PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT = 92 + PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT = 92, + PIPE_CAP_FAKE_SW_MSAA = 93, + PIPE_CAP_TEXTURE_QUERY_LOD = 94, + PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET = 95, + PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET = 96, + PIPE_CAP_SAMPLE_SHADING = 97, + PIPE_CAP_TEXTURE_GATHER_OFFSETS = 98, + PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION = 99, + PIPE_CAP_MAX_VERTEX_STREAMS = 100, + PIPE_CAP_DRAW_INDIRECT = 101, + PIPE_CAP_TGSI_FS_FINE_DERIVATIVE = 102, + PIPE_CAP_VENDOR_ID = 103, + PIPE_CAP_DEVICE_ID = 104, + PIPE_CAP_ACCELERATED = 105, + PIPE_CAP_VIDEO_MEMORY = 106, + PIPE_CAP_UMA = 107, + PIPE_CAP_CONDITIONAL_RENDER_INVERTED = 108, + PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE = 109, + PIPE_CAP_SAMPLER_VIEW_TARGET = 110, + PIPE_CAP_CLIP_HALFZ = 111, + PIPE_CAP_VERTEXID_NOBASE = 112, + PIPE_CAP_POLYGON_OFFSET_CLAMP = 113, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) @@ -573,44 +595,45 @@ enum pipe_endian { */ enum pipe_capf { - PIPE_CAPF_MAX_LINE_WIDTH = 15, - PIPE_CAPF_MAX_LINE_WIDTH_AA = 16, - PIPE_CAPF_MAX_POINT_WIDTH = 17, - PIPE_CAPF_MAX_POINT_WIDTH_AA = 18, - PIPE_CAPF_MAX_TEXTURE_ANISOTROPY = 19, - PIPE_CAPF_MAX_TEXTURE_LOD_BIAS = 20, - PIPE_CAPF_GUARD_BAND_LEFT = 21, - PIPE_CAPF_GUARD_BAND_TOP = 22, - PIPE_CAPF_GUARD_BAND_RIGHT = 23, - PIPE_CAPF_GUARD_BAND_BOTTOM = 24 + PIPE_CAPF_MAX_LINE_WIDTH, + PIPE_CAPF_MAX_LINE_WIDTH_AA, + PIPE_CAPF_MAX_POINT_WIDTH, + PIPE_CAPF_MAX_POINT_WIDTH_AA, + PIPE_CAPF_MAX_TEXTURE_ANISOTROPY, + PIPE_CAPF_MAX_TEXTURE_LOD_BIAS, + PIPE_CAPF_GUARD_BAND_LEFT, + PIPE_CAPF_GUARD_BAND_TOP, + PIPE_CAPF_GUARD_BAND_RIGHT, + PIPE_CAPF_GUARD_BAND_BOTTOM }; /* Shader caps not specific to any single stage */ enum pipe_shader_cap { - PIPE_SHADER_CAP_MAX_INSTRUCTIONS = 0, /* if 0, it means the stage is unsupported */ - PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS = 1, - PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS = 2, - PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS = 3, - PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH = 4, - PIPE_SHADER_CAP_MAX_INPUTS = 5, - PIPE_SHADER_CAP_MAX_CONSTS = 6, - PIPE_SHADER_CAP_MAX_CONST_BUFFERS = 7, - PIPE_SHADER_CAP_MAX_TEMPS = 8, - PIPE_SHADER_CAP_MAX_ADDRS = 9, - PIPE_SHADER_CAP_MAX_PREDS = 10, + PIPE_SHADER_CAP_MAX_INSTRUCTIONS, /* if 0, it means the stage is unsupported */ + PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS, + PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS, + PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS, + PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH, + PIPE_SHADER_CAP_MAX_INPUTS, + PIPE_SHADER_CAP_MAX_OUTPUTS, + PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE, + PIPE_SHADER_CAP_MAX_CONST_BUFFERS, + PIPE_SHADER_CAP_MAX_TEMPS, + PIPE_SHADER_CAP_MAX_PREDS, /* boolean caps */ - PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED = 11, - PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR = 12, - PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR = 13, - PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14, - PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15, - PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */ - PIPE_SHADER_CAP_INTEGERS = 17, - PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18, - PIPE_SHADER_CAP_PREFERRED_IR = 19, - PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED = 20, - PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS = 21 + PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED, + PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR, + PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR, + PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR, + PIPE_SHADER_CAP_INDIRECT_CONST_ADDR, + PIPE_SHADER_CAP_SUBROUTINES, /* BGNSUB, ENDSUB, CAL, RET */ + PIPE_SHADER_CAP_INTEGERS, + PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS, + PIPE_SHADER_CAP_PREFERRED_IR, + PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED, + PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS, + PIPE_SHADER_CAP_DOUBLES }; /** @@ -619,7 +642,8 @@ enum pipe_shader_cap enum pipe_shader_ir { PIPE_SHADER_IR_TGSI, - PIPE_SHADER_IR_LLVM + PIPE_SHADER_IR_LLVM, + PIPE_SHADER_IR_NATIVE }; /** @@ -637,7 +661,10 @@ enum pipe_compute_cap PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE, PIPE_COMPUTE_CAP_MAX_INPUT_SIZE, - PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE + PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, + PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY, + PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS, + PIPE_COMPUTE_CAP_IMAGES_SUPPORTED }; /** diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index daa3be2..3f505f2 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -35,23 +35,9 @@ extern "C" { #include "p_config.h" -enum pipe_type { - PIPE_TYPE_UNORM = 0, - PIPE_TYPE_SNORM, - PIPE_TYPE_SINT, - PIPE_TYPE_UINT, - PIPE_TYPE_FLOAT, - PIPE_TYPE_COUNT -}; - /** - * Texture/surface image formats (preliminary) + * Formats for textures, surfaces and vertex data */ - -/* KW: Added lots of surface formats to support vertex element layout - * definitions, and eventually render-to-vertex-buffer. - */ - enum pipe_format { PIPE_FORMAT_NONE = 0, PIPE_FORMAT_B8G8R8A8_UNORM = 1, @@ -236,8 +222,8 @@ enum pipe_format { PIPE_FORMAT_NV12 = 166, PIPE_FORMAT_NV21 = 167, - PIPE_FORMAT_R4A4_UNORM = 168, - PIPE_FORMAT_A4R4_UNORM = 169, + PIPE_FORMAT_A4R4_UNORM = 168, + PIPE_FORMAT_R4A4_UNORM = 169, PIPE_FORMAT_R8A8_UNORM = 170, PIPE_FORMAT_A8R8_UNORM = 171, @@ -342,6 +328,26 @@ enum pipe_format { PIPE_FORMAT_R32A32_SINT = 252, PIPE_FORMAT_R10G10B10A2_UINT = 253, + PIPE_FORMAT_B5G6R5_SRGB = 254, + + PIPE_FORMAT_BPTC_RGBA_UNORM = 255, + PIPE_FORMAT_BPTC_SRGBA = 256, + PIPE_FORMAT_BPTC_RGB_FLOAT = 257, + PIPE_FORMAT_BPTC_RGB_UFLOAT = 258, + + PIPE_FORMAT_A8L8_UNORM = 259, + PIPE_FORMAT_A8L8_SNORM = 260, + PIPE_FORMAT_A8L8_SRGB = 261, + PIPE_FORMAT_A16L16_UNORM = 262, + + PIPE_FORMAT_G8R8_UNORM = 263, + PIPE_FORMAT_G8R8_SNORM = 264, + PIPE_FORMAT_G16R16_UNORM = 265, + PIPE_FORMAT_G16R16_SNORM = 266, + + PIPE_FORMAT_A8B8G8R8_SNORM = 267, + PIPE_FORMAT_X8B8G8R8_SNORM = 268, + PIPE_FORMAT_COUNT }; @@ -354,6 +360,34 @@ enum pipe_format { #define PIPE_FORMAT_XRGB8888_UNORM PIPE_FORMAT_X8R8G8B8_UNORM #define PIPE_FORMAT_ABGR8888_UNORM PIPE_FORMAT_A8B8G8R8_UNORM #define PIPE_FORMAT_XBGR8888_UNORM PIPE_FORMAT_X8B8G8R8_UNORM +#define PIPE_FORMAT_RGBA8888_SNORM PIPE_FORMAT_R8G8B8A8_SNORM +#define PIPE_FORMAT_RGBX8888_SNORM PIPE_FORMAT_R8G8B8X8_SNORM +#define PIPE_FORMAT_ABGR8888_SNORM PIPE_FORMAT_A8B8G8R8_SNORM +#define PIPE_FORMAT_XBGR8888_SNORM PIPE_FORMAT_X8B8G8R8_SNORM +#define PIPE_FORMAT_RGBA8888_SRGB PIPE_FORMAT_R8G8B8A8_SRGB +#define PIPE_FORMAT_RGBX8888_SRGB PIPE_FORMAT_R8G8B8X8_SRGB +#define PIPE_FORMAT_BGRA8888_SRGB PIPE_FORMAT_B8G8R8A8_SRGB +#define PIPE_FORMAT_BGRX8888_SRGB PIPE_FORMAT_B8G8R8X8_SRGB +#define PIPE_FORMAT_ARGB8888_SRGB PIPE_FORMAT_A8R8G8B8_SRGB +#define PIPE_FORMAT_XRGB8888_SRGB PIPE_FORMAT_X8R8G8B8_SRGB +#define PIPE_FORMAT_ABGR8888_SRGB PIPE_FORMAT_A8B8G8R8_SRGB +#define PIPE_FORMAT_XBGR8888_SRGB PIPE_FORMAT_X8B8G8R8_SRGB +#define PIPE_FORMAT_LA88_UNORM PIPE_FORMAT_L8A8_UNORM +#define PIPE_FORMAT_AL88_UNORM PIPE_FORMAT_A8L8_UNORM +#define PIPE_FORMAT_LA88_SNORM PIPE_FORMAT_L8A8_SNORM +#define PIPE_FORMAT_AL88_SNORM PIPE_FORMAT_A8L8_SNORM +#define PIPE_FORMAT_LA88_SRGB PIPE_FORMAT_L8A8_SRGB +#define PIPE_FORMAT_AL88_SRGB PIPE_FORMAT_A8L8_SRGB +#define PIPE_FORMAT_LA1616_UNORM PIPE_FORMAT_L16A16_UNORM +#define PIPE_FORMAT_AL1616_UNORM PIPE_FORMAT_A16L16_UNORM +#define PIPE_FORMAT_RG88_UNORM PIPE_FORMAT_R8G8_UNORM +#define PIPE_FORMAT_GR88_UNORM PIPE_FORMAT_G8R8_UNORM +#define PIPE_FORMAT_RG88_SNORM PIPE_FORMAT_R8G8_SNORM +#define PIPE_FORMAT_GR88_SNORM PIPE_FORMAT_G8R8_SNORM +#define PIPE_FORMAT_RG1616_UNORM PIPE_FORMAT_R16G16_UNORM +#define PIPE_FORMAT_GR1616_UNORM PIPE_FORMAT_G16R16_UNORM +#define PIPE_FORMAT_RG1616_SNORM PIPE_FORMAT_R16G16_SNORM +#define PIPE_FORMAT_GR1616_SNORM PIPE_FORMAT_G16R16_SNORM #elif defined(PIPE_ARCH_BIG_ENDIAN) #define PIPE_FORMAT_ABGR8888_UNORM PIPE_FORMAT_R8G8B8A8_UNORM #define PIPE_FORMAT_XBGR8888_UNORM PIPE_FORMAT_R8G8B8X8_UNORM @@ -364,6 +398,34 @@ enum pipe_format { #define PIPE_FORMAT_BGRX8888_UNORM PIPE_FORMAT_X8R8G8B8_UNORM #define PIPE_FORMAT_RGBA8888_UNORM PIPE_FORMAT_A8B8G8R8_UNORM #define PIPE_FORMAT_RGBX8888_UNORM PIPE_FORMAT_X8B8G8R8_UNORM +#define PIPE_FORMAT_ABGR8888_SNORM PIPE_FORMAT_R8G8B8A8_SNORM +#define PIPE_FORMAT_XBGR8888_SNORM PIPE_FORMAT_R8G8B8X8_SNORM +#define PIPE_FORMAT_RGBA8888_SNORM PIPE_FORMAT_A8B8G8R8_SNORM +#define PIPE_FORMAT_RGBX8888_SNORM PIPE_FORMAT_X8B8G8R8_SNORM +#define PIPE_FORMAT_ABGR8888_SRGB PIPE_FORMAT_R8G8B8A8_SRGB +#define PIPE_FORMAT_XBGR8888_SRGB PIPE_FORMAT_R8G8B8X8_SRGB +#define PIPE_FORMAT_ARGB8888_SRGB PIPE_FORMAT_B8G8R8A8_SRGB +#define PIPE_FORMAT_XRGB8888_SRGB PIPE_FORMAT_B8G8R8X8_SRGB +#define PIPE_FORMAT_BGRA8888_SRGB PIPE_FORMAT_A8R8G8B8_SRGB +#define PIPE_FORMAT_BGRX8888_SRGB PIPE_FORMAT_X8R8G8B8_SRGB +#define PIPE_FORMAT_RGBA8888_SRGB PIPE_FORMAT_A8B8G8R8_SRGB +#define PIPE_FORMAT_RGBX8888_SRGB PIPE_FORMAT_X8B8G8R8_SRGB +#define PIPE_FORMAT_LA88_UNORM PIPE_FORMAT_A8L8_UNORM +#define PIPE_FORMAT_AL88_UNORM PIPE_FORMAT_L8A8_UNORM +#define PIPE_FORMAT_LA88_SNORM PIPE_FORMAT_A8L8_SNORM +#define PIPE_FORMAT_AL88_SNORM PIPE_FORMAT_L8A8_SNORM +#define PIPE_FORMAT_LA88_SRGB PIPE_FORMAT_A8L8_SRGB +#define PIPE_FORMAT_AL88_SRGB PIPE_FORMAT_L8A8_SRGB +#define PIPE_FORMAT_LA1616_UNORM PIPE_FORMAT_A16L16_UNORM +#define PIPE_FORMAT_AL1616_UNORM PIPE_FORMAT_L16A16_UNORM +#define PIPE_FORMAT_RG88_UNORM PIPE_FORMAT_G8R8_UNORM +#define PIPE_FORMAT_GR88_UNORM PIPE_FORMAT_R8G8_UNORM +#define PIPE_FORMAT_RG88_SNORM PIPE_FORMAT_G8R8_SNORM +#define PIPE_FORMAT_GR88_SNORM PIPE_FORMAT_R8G8_SNORM +#define PIPE_FORMAT_RG1616_UNORM PIPE_FORMAT_G16R16_UNORM +#define PIPE_FORMAT_GR1616_UNORM PIPE_FORMAT_R16G16_UNORM +#define PIPE_FORMAT_RG1616_SNORM PIPE_FORMAT_G16R16_SNORM +#define PIPE_FORMAT_GR1616_SNORM PIPE_FORMAT_R16G16_SNORM #endif enum pipe_video_chroma_format diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 8fa6a0a..25c4aeb 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -103,6 +103,11 @@ enum tgsi_file_type { #define TGSI_INTERPOLATE_COLOR 3 /* special color case for smooth/flat */ #define TGSI_INTERPOLATE_COUNT 4 +#define TGSI_INTERPOLATE_LOC_CENTER 0 +#define TGSI_INTERPOLATE_LOC_CENTROID 1 +#define TGSI_INTERPOLATE_LOC_SAMPLE 2 +#define TGSI_INTERPOLATE_LOC_COUNT 3 + #define TGSI_CYLINDRICAL_WRAP_X (1 << 0) #define TGSI_CYLINDRICAL_WRAP_Y (1 << 1) #define TGSI_CYLINDRICAL_WRAP_Z (1 << 2) @@ -138,9 +143,9 @@ struct tgsi_declaration_dimension struct tgsi_declaration_interp { unsigned Interpolate : 4; /**< one of TGSI_INTERPOLATE_x */ - unsigned Centroid : 1; /**< centroid sampling? */ + unsigned Location : 2; /**< one of TGSI_INTERPOLATE_LOC_x */ unsigned CylindricalWrap:4; /**< TGSI_CYLINDRICAL_WRAP_x flags */ - unsigned Padding : 23; + unsigned Padding : 22; }; #define TGSI_SEMANTIC_POSITION 0 @@ -167,7 +172,13 @@ struct tgsi_declaration_interp #define TGSI_SEMANTIC_VIEWPORT_INDEX 21 /**< viewport index */ #define TGSI_SEMANTIC_LAYER 22 /**< layer (rendertarget index) */ #define TGSI_SEMANTIC_CULLDIST 23 -#define TGSI_SEMANTIC_COUNT 24 /**< number of semantic values */ +#define TGSI_SEMANTIC_SAMPLEID 24 +#define TGSI_SEMANTIC_SAMPLEPOS 25 +#define TGSI_SEMANTIC_SAMPLEMASK 26 +#define TGSI_SEMANTIC_INVOCATIONID 27 +#define TGSI_SEMANTIC_VERTEXID_NOBASE 28 +#define TGSI_SEMANTIC_BASEVERTEX 29 +#define TGSI_SEMANTIC_COUNT 30 /**< number of semantic values */ struct tgsi_declaration_semantic { @@ -183,12 +194,21 @@ struct tgsi_declaration_resource { unsigned Padding : 22; }; +enum tgsi_return_type { + TGSI_RETURN_TYPE_UNORM = 0, + TGSI_RETURN_TYPE_SNORM, + TGSI_RETURN_TYPE_SINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_COUNT +}; + struct tgsi_declaration_sampler_view { unsigned Resource : 8; /**< one of TGSI_TEXTURE_ */ - unsigned ReturnTypeX : 6; /**< one of enum pipe_type */ - unsigned ReturnTypeY : 6; /**< one of enum pipe_type */ - unsigned ReturnTypeZ : 6; /**< one of enum pipe_type */ - unsigned ReturnTypeW : 6; /**< one of enum pipe_type */ + unsigned ReturnTypeX : 6; /**< one of enum tgsi_return_type */ + unsigned ReturnTypeY : 6; /**< one of enum tgsi_return_type */ + unsigned ReturnTypeZ : 6; /**< one of enum tgsi_return_type */ + unsigned ReturnTypeW : 6; /**< one of enum tgsi_return_type */ }; struct tgsi_declaration_array { @@ -208,6 +228,7 @@ struct tgsi_declaration_array { #define TGSI_IMM_FLOAT32 0 #define TGSI_IMM_UINT32 1 #define TGSI_IMM_INT32 2 +#define TGSI_IMM_FLOAT64 3 struct tgsi_immediate { @@ -232,7 +253,9 @@ union tgsi_immediate_data #define TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS 5 #define TGSI_PROPERTY_FS_DEPTH_LAYOUT 6 #define TGSI_PROPERTY_VS_PROHIBIT_UCPS 7 -#define TGSI_PROPERTY_COUNT 8 +#define TGSI_PROPERTY_GS_INVOCATIONS 8 +#define TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION 9 +#define TGSI_PROPERTY_COUNT 10 struct tgsi_property { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */ @@ -283,7 +306,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_MAD 16 #define TGSI_OPCODE_SUB 17 #define TGSI_OPCODE_LRP 18 -#define TGSI_OPCODE_CND 19 + /* gap */ #define TGSI_OPCODE_SQRT 20 #define TGSI_OPCODE_DP2A 21 /* gap */ @@ -297,7 +320,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_XPD 31 /* gap */ #define TGSI_OPCODE_ABS 33 -#define TGSI_OPCODE_RCC 34 + /* gap */ #define TGSI_OPCODE_DPH 35 #define TGSI_OPCODE_COS 36 #define TGSI_OPCODE_DDX 37 @@ -307,14 +330,14 @@ struct tgsi_property_data { #define TGSI_OPCODE_PK2US 41 #define TGSI_OPCODE_PK4B 42 #define TGSI_OPCODE_PK4UB 43 -#define TGSI_OPCODE_RFL 44 + /* gap */ #define TGSI_OPCODE_SEQ 45 -#define TGSI_OPCODE_SFL 46 + /* gap */ #define TGSI_OPCODE_SGT 47 #define TGSI_OPCODE_SIN 48 #define TGSI_OPCODE_SLE 49 #define TGSI_OPCODE_SNE 50 -#define TGSI_OPCODE_STR 51 + /* gap */ #define TGSI_OPCODE_TEX 52 #define TGSI_OPCODE_TXD 53 #define TGSI_OPCODE_TXP 54 @@ -322,17 +345,16 @@ struct tgsi_property_data { #define TGSI_OPCODE_UP2US 56 #define TGSI_OPCODE_UP4B 57 #define TGSI_OPCODE_UP4UB 58 -#define TGSI_OPCODE_X2D 59 -#define TGSI_OPCODE_ARA 60 + /* gap */ #define TGSI_OPCODE_ARR 61 -#define TGSI_OPCODE_BRA 62 + /* gap */ #define TGSI_OPCODE_CAL 63 #define TGSI_OPCODE_RET 64 #define TGSI_OPCODE_SSG 65 /* SGN */ #define TGSI_OPCODE_CMP 66 #define TGSI_OPCODE_SCS 67 #define TGSI_OPCODE_TXB 68 -#define TGSI_OPCODE_NRM 69 + /* gap */ #define TGSI_OPCODE_DIV 70 #define TGSI_OPCODE_DP2 71 #define TGSI_OPCODE_TXL 72 @@ -341,7 +363,10 @@ struct tgsi_property_data { #define TGSI_OPCODE_UIF 75 #define TGSI_OPCODE_ELSE 77 #define TGSI_OPCODE_ENDIF 78 - /* gap */ + +#define TGSI_OPCODE_DDX_FINE 79 +#define TGSI_OPCODE_DDY_FINE 80 + #define TGSI_OPCODE_PUSHA 81 #define TGSI_OPCODE_POPA 82 #define TGSI_OPCODE_CEIL 83 @@ -373,7 +398,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_FSLT 110 #define TGSI_OPCODE_FSNE 111 -#define TGSI_OPCODE_NRM4 112 + /* gap */ #define TGSI_OPCODE_CALLNZ 113 /* gap */ #define TGSI_OPCODE_BREAKC 115 @@ -455,7 +480,22 @@ struct tgsi_property_data { #define TGSI_OPCODE_TG4 182 -#define TGSI_OPCODE_LAST 183 +#define TGSI_OPCODE_LODQ 183 + +#define TGSI_OPCODE_IBFE 184 +#define TGSI_OPCODE_UBFE 185 +#define TGSI_OPCODE_BFI 186 +#define TGSI_OPCODE_BREV 187 +#define TGSI_OPCODE_POPC 188 +#define TGSI_OPCODE_LSB 189 +#define TGSI_OPCODE_IMSB 190 +#define TGSI_OPCODE_UMSB 191 + +#define TGSI_OPCODE_INTERP_CENTROID 192 +#define TGSI_OPCODE_INTERP_SAMPLE 193 +#define TGSI_OPCODE_INTERP_OFFSET 194 + +#define TGSI_OPCODE_LAST 195 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ diff --git a/src/vrend_shader.c b/src/vrend_shader.c index efaba7a..1b3c866 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -212,7 +212,6 @@ iter_declaration(struct tgsi_iterate_context *iter, ctx->inputs[i].name = decl->Semantic.Name; ctx->inputs[i].sid = decl->Semantic.Index; ctx->inputs[i].interpolate = decl->Interp.Interpolate; - ctx->inputs[i].centroid = decl->Interp.Centroid; ctx->inputs[i].first = decl->Range.First; ctx->inputs[i].glsl_predefined_no_emit = FALSE; ctx->inputs[i].glsl_no_index = FALSE; @@ -236,7 +235,6 @@ iter_declaration(struct tgsi_iterate_context *iter, ctx->inputs[j].name = TGSI_SEMANTIC_BCOLOR; ctx->inputs[j].sid = decl->Semantic.Index; ctx->inputs[j].interpolate = decl->Interp.Interpolate; - ctx->inputs[j].centroid = decl->Interp.Centroid; ctx->inputs[j].first = decl->Range.First; ctx->inputs[j].glsl_predefined_no_emit = FALSE; ctx->inputs[j].glsl_no_index = FALSE;