diff --git a/src/mesa/meson.build b/src/mesa/meson.build index 0d1f57a..fd767a7 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -5,8 +5,10 @@ inc_mesa = include_directories('.', 'compat', 'pipe', 'util') files_mesa = files( 'util/bitscan.c', + 'util/hash_table.c', 'util/os_file.c', 'util/os_misc.c', + 'util/ralloc.c', 'util/u_cpu_detect.c', 'util/u_debug.c', 'util/u_math.c', diff --git a/src/mesa/util/compiler.h b/src/mesa/util/compiler.h new file mode 100644 index 0000000..da602cf --- /dev/null +++ b/src/mesa/util/compiler.h @@ -0,0 +1,89 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file compiler.h + * Compiler-related stuff. + */ + + +#ifndef COMPILER_H +#define COMPILER_H + + +#include + +#include "util/macros.h" + +#include "c99_compat.h" /* inline, __func__, etc. */ + + +/** + * Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32. + * Do not use these unless absolutely necessary! + * Try to use a runtime test instead. + * For now, only used by some DRI hardware drivers for color/texel packing. + */ +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN +#if defined(__linux__) +#include +#define CPU_TO_LE32( x ) bswap_32( x ) +#elif defined(__APPLE__) +#include +#define CPU_TO_LE32( x ) CFSwapInt32HostToLittle( x ) +#elif defined(__OpenBSD__) +#include +#define CPU_TO_LE32( x ) htole32( x ) +#else /*__linux__ */ +#include +#define CPU_TO_LE32( x ) bswap32( x ) +#endif /*__linux__*/ +#define MESA_BIG_ENDIAN 1 +#else +#define CPU_TO_LE32( x ) ( x ) +#define MESA_LITTLE_ENDIAN 1 +#endif +#define LE32_TO_CPU( x ) CPU_TO_LE32( x ) + + + +#define IEEE_ONE 0x3f800000 + +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + +#if __cplusplus >= 201703L || __STDC_VERSION__ > 201710L +/* Standard C++17/C23 attribute */ +#define FALLTHROUGH [[fallthrough]] +#elif __has_attribute(fallthrough) +/* Non-standard but supported by at least gcc and clang */ +#define FALLTHROUGH __attribute__((fallthrough)) +#else +#define FALLTHROUGH do { } while(0) +#endif + +#endif /* COMPILER_H */ diff --git a/src/mesa/util/fast_urem_by_const.h b/src/mesa/util/fast_urem_by_const.h new file mode 100644 index 0000000..beb253d --- /dev/null +++ b/src/mesa/util/fast_urem_by_const.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2010 Valve Software + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +/* + * Code for fast 32-bit unsigned remainder, based off of "Faster Remainder by + * Direct Computation: Applications to Compilers and Software Libraries," + * available at https://arxiv.org/pdf/1902.01961.pdf. + * + * util_fast_urem32(n, d, REMAINDER_MAGIC(d)) returns the same thing as + * n % d for any unsigned n and d, however it compiles down to only a few + * multiplications, so it should be faster than plain uint32_t modulo if the + * same divisor is used many times. + */ + +#define REMAINDER_MAGIC(divisor) \ + ((uint64_t) ~0ull / (divisor) + 1) + +/* + * Get bits 64-96 of a 32x64-bit multiply. If __int128_t is available, we use + * it, which usually compiles down to one instruction on 64-bit architectures. + * Otherwise on 32-bit architectures we usually get four instructions (one + * 32x32->64 multiply, one 32x32->32 multiply, and one 64-bit add). + */ + +static inline uint32_t +_mul32by64_hi(uint32_t a, uint64_t b) +{ +#ifdef HAVE_UINT128 + return ((__uint128_t) b * a) >> 64; +#else + /* + * Let b = b0 + 2^32 * b1. Then a * b = a * b0 + 2^32 * a * b1. We would + * have to do a 96-bit addition to get the full result, except that only + * one term has non-zero lower 32 bits, which means that to get the high 32 + * bits, we only have to add the high 64 bits of each term. Unfortunately, + * we have to do the 64-bit addition in case the low 32 bits overflow. + */ + uint32_t b0 = (uint32_t) b; + uint32_t b1 = b >> 32; + return ((((uint64_t) a * b0) >> 32) + (uint64_t) a * b1) >> 32; +#endif +} + +static inline uint32_t +util_fast_urem32(uint32_t n, uint32_t d, uint64_t magic) +{ + uint64_t lowbits = magic * n; + uint32_t result = _mul32by64_hi(d, lowbits); + assert(result == n % d); + return result; +} + diff --git a/src/mesa/util/hash_table.c b/src/mesa/util/hash_table.c new file mode 100644 index 0000000..1811ee7 --- /dev/null +++ b/src/mesa/util/hash_table.c @@ -0,0 +1,906 @@ +/* + * Copyright © 2009,2012 Intel Corporation + * Copyright © 1988-2004 Keith Packard and Bart Massey. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Except as contained in this notice, the names of the authors + * or their institutions shall not be used in advertising or + * otherwise to promote the sale, use or other dealings in this + * Software without prior written authorization from the + * authors. + * + * Authors: + * Eric Anholt + * Keith Packard + */ + +/** + * Implements an open-addressing, linear-reprobing hash table. + * + * For more information, see: + * + * http://cgit.freedesktop.org/~anholt/hash_table/tree/README + */ + +#include +#include +#include + +#include "hash_table.h" +#include "ralloc.h" +#include "macros.h" +#include "u_memory.h" +#include "fast_urem_by_const.h" +#include "util/u_memory.h" + +#define XXH_INLINE_ALL +#include "xxhash.h" + +/** + * Magic number that gets stored outside of the struct hash_table. + * + * The hash table needs a particular pointer to be the marker for a key that + * was deleted from the table, along with NULL for the "never allocated in the + * table" marker. Legacy GL allows any GLuint to be used as a GL object name, + * and we use a 1:1 mapping from GLuints to key pointers, so we need to be + * able to track a GLuint that happens to match the deleted key outside of + * struct hash_table. We tell the hash table to use "1" as the deleted key + * value, so that we test the deleted-key-in-the-table path as best we can. + */ +#define DELETED_KEY_VALUE 1 + +static inline void * +uint_key(unsigned id) +{ + return (void *)(uintptr_t) id; +} + +static const uint32_t deleted_key_value; + +/** + * From Knuth -- a good choice for hash/rehash values is p, p-2 where + * p and p-2 are both prime. These tables are sized to have an extra 10% + * free to avoid exponential performance degradation as the hash table fills + */ +static const struct { + uint32_t max_entries, size, rehash; + uint64_t size_magic, rehash_magic; +} hash_sizes[] = { +#define ENTRY(max_entries, size, rehash) \ + { max_entries, size, rehash, \ + REMAINDER_MAGIC(size), REMAINDER_MAGIC(rehash) } + + ENTRY(2, 5, 3 ), + ENTRY(4, 7, 5 ), + ENTRY(8, 13, 11 ), + ENTRY(16, 19, 17 ), + ENTRY(32, 43, 41 ), + ENTRY(64, 73, 71 ), + ENTRY(128, 151, 149 ), + ENTRY(256, 283, 281 ), + ENTRY(512, 571, 569 ), + ENTRY(1024, 1153, 1151 ), + ENTRY(2048, 2269, 2267 ), + ENTRY(4096, 4519, 4517 ), + ENTRY(8192, 9013, 9011 ), + ENTRY(16384, 18043, 18041 ), + ENTRY(32768, 36109, 36107 ), + ENTRY(65536, 72091, 72089 ), + ENTRY(131072, 144409, 144407 ), + ENTRY(262144, 288361, 288359 ), + ENTRY(524288, 576883, 576881 ), + ENTRY(1048576, 1153459, 1153457 ), + ENTRY(2097152, 2307163, 2307161 ), + ENTRY(4194304, 4613893, 4613891 ), + ENTRY(8388608, 9227641, 9227639 ), + ENTRY(16777216, 18455029, 18455027 ), + ENTRY(33554432, 36911011, 36911009 ), + ENTRY(67108864, 73819861, 73819859 ), + ENTRY(134217728, 147639589, 147639587 ), + ENTRY(268435456, 295279081, 295279079 ), + ENTRY(536870912, 590559793, 590559791 ), + ENTRY(1073741824, 1181116273, 1181116271 ), + ENTRY(2147483648ul, 2362232233ul, 2362232231ul ) +}; + +ASSERTED static inline bool +key_pointer_is_reserved(const struct hash_table *ht, const void *key) +{ + return key == NULL || key == ht->deleted_key; +} + +static int +entry_is_free(const struct hash_entry *entry) +{ + return entry->key == NULL; +} + +static int +entry_is_deleted(const struct hash_table *ht, struct hash_entry *entry) +{ + return entry->key == ht->deleted_key; +} + +static int +entry_is_present(const struct hash_table *ht, struct hash_entry *entry) +{ + return entry->key != NULL && entry->key != ht->deleted_key; +} + +bool +_mesa_hash_table_init(struct hash_table *ht, + void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)) +{ + ht->size_index = 0; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->size_magic = hash_sizes[ht->size_index].size_magic; + ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->key_hash_function = key_hash_function; + ht->key_equals_function = key_equals_function; + ht->table = rzalloc_array(mem_ctx, struct hash_entry, ht->size); + ht->entries = 0; + ht->deleted_entries = 0; + ht->deleted_key = &deleted_key_value; + + return ht->table != NULL; +} + +struct hash_table * +_mesa_hash_table_create(void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)) +{ + struct hash_table *ht; + + /* mem_ctx is used to allocate the hash table, but the hash table is used + * to allocate all of the suballocations. + */ + ht = ralloc(mem_ctx, struct hash_table); + if (ht == NULL) + return NULL; + + if (!_mesa_hash_table_init(ht, ht, key_hash_function, key_equals_function)) { + ralloc_free(ht); + return NULL; + } + + return ht; +} + +static uint32_t +key_u32_hash(const void *key) +{ + uint32_t u = (uint32_t)(uintptr_t)key; + return _mesa_hash_uint(&u); +} + +static bool +key_u32_equals(const void *a, const void *b) +{ + return (uint32_t)(uintptr_t)a == (uint32_t)(uintptr_t)b; +} + +/* key == 0 and key == deleted_key are not allowed */ +struct hash_table * +_mesa_hash_table_create_u32_keys(void *mem_ctx) +{ + return _mesa_hash_table_create(mem_ctx, key_u32_hash, key_u32_equals); +} + +struct hash_table * +_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx) +{ + struct hash_table *ht; + + ht = ralloc(dst_mem_ctx, struct hash_table); + if (ht == NULL) + return NULL; + + memcpy(ht, src, sizeof(struct hash_table)); + + ht->table = ralloc_array(ht, struct hash_entry, ht->size); + if (ht->table == NULL) { + ralloc_free(ht); + return NULL; + } + + memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry)); + + return ht; +} + +/** + * Frees the given hash table. + * + * If delete_function is passed, it gets called on each entry present before + * freeing. + */ +void +_mesa_hash_table_destroy(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + if (!ht) + return; + + if (delete_function) { + hash_table_foreach(ht, entry) { + delete_function(entry); + } + } + ralloc_free(ht); +} + +static void +hash_table_clear_fast(struct hash_table *ht) +{ + memset(ht->table, 0, sizeof(struct hash_entry) * hash_sizes[ht->size_index].size); + ht->entries = ht->deleted_entries = 0; +} + +/** + * Deletes all entries of the given hash table without deleting the table + * itself or changing its structure. + * + * If delete_function is passed, it gets called on each entry present. + */ +void +_mesa_hash_table_clear(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + if (!ht) + return; + + struct hash_entry *entry; + + if (delete_function) { + for (entry = ht->table; entry != ht->table + ht->size; entry++) { + if (entry_is_present(ht, entry)) + delete_function(entry); + + entry->key = NULL; + } + ht->entries = 0; + ht->deleted_entries = 0; + } else + hash_table_clear_fast(ht); +} + +/** Sets the value of the key pointer used for deleted entries in the table. + * + * The assumption is that usually keys are actual pointers, so we use a + * default value of a pointer to an arbitrary piece of storage in the library. + * But in some cases a consumer wants to store some other sort of value in the + * table, like a uint32_t, in which case that pointer may conflict with one of + * their valid keys. This lets that user select a safe value. + * + * This must be called before any keys are actually deleted from the table. + */ +void +_mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key) +{ + ht->deleted_key = deleted_key; +} + +static struct hash_entry * +hash_table_search(struct hash_table *ht, uint32_t hash, const void *key) +{ + assert(!key_pointer_is_reserved(ht, key)); + + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + + do { + struct hash_entry *entry = ht->table + hash_address; + + if (entry_is_free(entry)) { + return NULL; + } else if (entry_is_present(ht, entry) && entry->hash == hash) { + if (ht->key_equals_function(key, entry->key)) { + return entry; + } + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (hash_address != start_hash_address); + + return NULL; +} + +/** + * Finds a hash table entry with the given key and hash of that key. + * + * Returns NULL if no entry is found. Note that the data pointer may be + * modified by the user. + */ +struct hash_entry * +_mesa_hash_table_search(struct hash_table *ht, const void *key) +{ + assert(ht->key_hash_function); + return hash_table_search(ht, ht->key_hash_function(key), key); +} + +struct hash_entry * +_mesa_hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key) +{ + assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key)); + return hash_table_search(ht, hash, key); +} + +static struct hash_entry * +hash_table_insert(struct hash_table *ht, uint32_t hash, + const void *key, void *data); + +static void +hash_table_insert_rehash(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + do { + struct hash_entry *entry = ht->table + hash_address; + + if (likely(entry->key == NULL)) { + entry->hash = hash; + entry->key = key; + entry->data = data; + return; + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (true); +} + +static void +_mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index) +{ + struct hash_table old_ht; + struct hash_entry *table; + + if (ht->size_index == new_size_index && ht->deleted_entries == ht->max_entries) { + hash_table_clear_fast(ht); + assert(!ht->entries); + return; + } + + if (new_size_index >= ARRAY_SIZE(hash_sizes)) + return; + + table = rzalloc_array(ralloc_parent(ht->table), struct hash_entry, + hash_sizes[new_size_index].size); + if (table == NULL) + return; + + old_ht = *ht; + + ht->table = table; + ht->size_index = new_size_index; + ht->size = hash_sizes[ht->size_index].size; + ht->rehash = hash_sizes[ht->size_index].rehash; + ht->size_magic = hash_sizes[ht->size_index].size_magic; + ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic; + ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->entries = 0; + ht->deleted_entries = 0; + + hash_table_foreach(&old_ht, entry) { + hash_table_insert_rehash(ht, entry->hash, entry->key, entry->data); + } + + ht->entries = old_ht.entries; + + ralloc_free(old_ht.table); +} + +static struct hash_entry * +hash_table_insert(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + struct hash_entry *available_entry = NULL; + + assert(!key_pointer_is_reserved(ht, key)); + + if (ht->entries >= ht->max_entries) { + _mesa_hash_table_rehash(ht, ht->size_index + 1); + } else if (ht->deleted_entries + ht->entries >= ht->max_entries) { + _mesa_hash_table_rehash(ht, ht->size_index); + } + + uint32_t size = ht->size; + uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic); + uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash, + ht->rehash_magic); + uint32_t hash_address = start_hash_address; + do { + struct hash_entry *entry = ht->table + hash_address; + + if (!entry_is_present(ht, entry)) { + /* Stash the first available entry we find */ + if (available_entry == NULL) + available_entry = entry; + if (entry_is_free(entry)) + break; + } + + /* Implement replacement when another insert happens + * with a matching key. This is a relatively common + * feature of hash tables, with the alternative + * generally being "insert the new value as well, and + * return it first when the key is searched for". + * + * Note that the hash table doesn't have a delete + * callback. If freeing of old data pointers is + * required to avoid memory leaks, perform a search + * before inserting. + */ + if (!entry_is_deleted(ht, entry) && + entry->hash == hash && + ht->key_equals_function(key, entry->key)) { + entry->key = key; + entry->data = data; + return entry; + } + + hash_address += double_hash; + if (hash_address >= size) + hash_address -= size; + } while (hash_address != start_hash_address); + + if (available_entry) { + if (entry_is_deleted(ht, available_entry)) + ht->deleted_entries--; + available_entry->hash = hash; + available_entry->key = key; + available_entry->data = data; + ht->entries++; + return available_entry; + } + + /* We could hit here if a required resize failed. An unchecked-malloc + * application could ignore this result. + */ + return NULL; +} + +/** + * Inserts the key with the given hash into the table. + * + * Note that insertion may rearrange the table on a resize or rehash, + * so previously found hash_entries are no longer valid after this function. + */ +struct hash_entry * +_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data) +{ + assert(ht->key_hash_function); + return hash_table_insert(ht, ht->key_hash_function(key), key, data); +} + +struct hash_entry * +_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key, void *data) +{ + assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key)); + return hash_table_insert(ht, hash, key, data); +} + +/** + * This function deletes the given hash table entry. + * + * Note that deletion doesn't otherwise modify the table, so an iteration over + * the table deleting entries is safe. + */ +void +_mesa_hash_table_remove(struct hash_table *ht, + struct hash_entry *entry) +{ + if (!entry) + return; + + entry->key = ht->deleted_key; + ht->entries--; + ht->deleted_entries++; +} + +/** + * Removes the entry with the corresponding key, if exists. + */ +void _mesa_hash_table_remove_key(struct hash_table *ht, + const void *key) +{ + _mesa_hash_table_remove(ht, _mesa_hash_table_search(ht, key)); +} + +/** + * This function is an iterator over the hash_table when no deleted entries are present. + * + * Pass in NULL for the first entry, as in the start of a for loop. + */ +struct hash_entry * +_mesa_hash_table_next_entry_unsafe(const struct hash_table *ht, struct hash_entry *entry) +{ + assert(!ht->deleted_entries); + if (!ht->entries) + return NULL; + if (entry == NULL) + entry = ht->table; + else + entry = entry + 1; + if (entry != ht->table + ht->size) + return entry->key ? entry : _mesa_hash_table_next_entry_unsafe(ht, entry); + + return NULL; +} + +/** + * This function is an iterator over the hash table. + * + * Pass in NULL for the first entry, as in the start of a for loop. Note that + * an iteration over the table is O(table_size) not O(entries). + */ +struct hash_entry * +_mesa_hash_table_next_entry(struct hash_table *ht, + struct hash_entry *entry) +{ + if (entry == NULL) + entry = ht->table; + else + entry = entry + 1; + + for (; entry != ht->table + ht->size; entry++) { + if (entry_is_present(ht, entry)) { + return entry; + } + } + + return NULL; +} + +/** + * Returns a random entry from the hash table. + * + * This may be useful in implementing random replacement (as opposed + * to just removing everything) in caches based on this hash table + * implementation. @predicate may be used to filter entries, or may + * be set to NULL for no filtering. + */ +struct hash_entry * +_mesa_hash_table_random_entry(struct hash_table *ht, + bool (*predicate)(struct hash_entry *entry)) +{ + struct hash_entry *entry; + uint32_t i = rand() % ht->size; + + if (ht->entries == 0) + return NULL; + + for (entry = ht->table + i; entry != ht->table + ht->size; entry++) { + if (entry_is_present(ht, entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + for (entry = ht->table; entry != ht->table + i; entry++) { + if (entry_is_present(ht, entry) && + (!predicate || predicate(entry))) { + return entry; + } + } + + return NULL; +} + + +uint32_t +_mesa_hash_data(const void *data, size_t size) +{ + return XXH32(data, size, 0); +} + +uint32_t +_mesa_hash_data_with_seed(const void *data, size_t size, uint32_t seed) +{ + return XXH32(data, size, seed); +} + +uint32_t +_mesa_hash_int(const void *key) +{ + return XXH32(key, sizeof(int), 0); +} + +uint32_t +_mesa_hash_uint(const void *key) +{ + return XXH32(key, sizeof(unsigned), 0); +} + +uint32_t +_mesa_hash_u32(const void *key) +{ + return XXH32(key, 4, 0); +} + +/** FNV-1a string hash implementation */ +uint32_t +_mesa_hash_string(const void *_key) +{ + uint32_t hash = 0; + const char *key = _key; + size_t len = strlen(key); +#if defined(_WIN64) || defined(__x86_64__) + hash = (uint32_t)XXH64(key, len, hash); +#else + hash = XXH32(key, len, hash); +#endif + return hash; +} + +uint32_t +_mesa_hash_pointer(const void *pointer) +{ + uintptr_t num = (uintptr_t) pointer; + return (uint32_t) ((num >> 2) ^ (num >> 6) ^ (num >> 10) ^ (num >> 14)); +} + +bool +_mesa_key_int_equal(const void *a, const void *b) +{ + return *((const int *)a) == *((const int *)b); +} + +bool +_mesa_key_uint_equal(const void *a, const void *b) +{ + + return *((const unsigned *)a) == *((const unsigned *)b); +} + +bool +_mesa_key_u32_equal(const void *a, const void *b) +{ + return *((const uint32_t *)a) == *((const uint32_t *)b); +} + +/** + * String compare function for use as the comparison callback in + * _mesa_hash_table_create(). + */ +bool +_mesa_key_string_equal(const void *a, const void *b) +{ + return strcmp(a, b) == 0; +} + +bool +_mesa_key_pointer_equal(const void *a, const void *b) +{ + return a == b; +} + +/** + * Helper to create a hash table with pointer keys. + */ +struct hash_table * +_mesa_pointer_hash_table_create(void *mem_ctx) +{ + return _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + + +bool +_mesa_hash_table_reserve(struct hash_table *ht, unsigned size) +{ + if (size < ht->max_entries) + return true; + for (unsigned i = ht->size_index + 1; i < ARRAY_SIZE(hash_sizes); i++) { + if (hash_sizes[i].max_entries >= size) { + _mesa_hash_table_rehash(ht, i); + break; + } + } + return ht->max_entries >= size; +} + +/** + * Hash table wrapper which supports 64-bit keys. + * + * TODO: unify all hash table implementations. + */ + +struct hash_key_u64 { + uint64_t value; +}; + +static uint32_t +key_u64_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct hash_key_u64)); +} + +static bool +key_u64_equals(const void *a, const void *b) +{ + const struct hash_key_u64 *aa = a; + const struct hash_key_u64 *bb = b; + + return aa->value == bb->value; +} + +#define FREED_KEY_VALUE 0 + +struct hash_table_u64 * +_mesa_hash_table_u64_create(void *mem_ctx) +{ + STATIC_ASSERT(FREED_KEY_VALUE != DELETED_KEY_VALUE); + struct hash_table_u64 *ht; + + ht = CALLOC_STRUCT(hash_table_u64); + if (!ht) + return NULL; + + if (sizeof(void *) == 8) { + ht->table = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } else { + ht->table = _mesa_hash_table_create(mem_ctx, key_u64_hash, + key_u64_equals); + } + + if (ht->table) + _mesa_hash_table_set_deleted_key(ht->table, uint_key(DELETED_KEY_VALUE)); + + return ht; +} + +static void +_mesa_hash_table_u64_delete_key(struct hash_entry *entry) +{ + if (sizeof(void *) == 8) + return; + + struct hash_key_u64 *_key = (struct hash_key_u64 *)entry->key; + + if (_key) + free(_key); +} + +void +_mesa_hash_table_u64_clear(struct hash_table_u64 *ht) +{ + if (!ht) + return; + + _mesa_hash_table_clear(ht->table, _mesa_hash_table_u64_delete_key); +} + +void +_mesa_hash_table_u64_destroy(struct hash_table_u64 *ht) +{ + if (!ht) + return; + + _mesa_hash_table_u64_clear(ht); + _mesa_hash_table_destroy(ht->table, NULL); + free(ht); +} + +void +_mesa_hash_table_u64_insert(struct hash_table_u64 *ht, uint64_t key, + void *data) +{ + if (key == FREED_KEY_VALUE) { + ht->freed_key_data = data; + return; + } + + if (key == DELETED_KEY_VALUE) { + ht->deleted_key_data = data; + return; + } + + if (sizeof(void *) == 8) { + _mesa_hash_table_insert(ht->table, (void *)(uintptr_t)key, data); + } else { + struct hash_key_u64 *_key = CALLOC_STRUCT(hash_key_u64); + + if (!_key) + return; + _key->value = key; + + _mesa_hash_table_insert(ht->table, _key, data); + } +} + +static struct hash_entry * +hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key) +{ + if (sizeof(void *) == 8) { + return _mesa_hash_table_search(ht->table, (void *)(uintptr_t)key); + } else { + struct hash_key_u64 _key = { .value = key }; + return _mesa_hash_table_search(ht->table, &_key); + } +} + +void * +_mesa_hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key) +{ + struct hash_entry *entry; + + if (key == FREED_KEY_VALUE) + return ht->freed_key_data; + + if (key == DELETED_KEY_VALUE) + return ht->deleted_key_data; + + entry = hash_table_u64_search(ht, key); + if (!entry) + return NULL; + + return entry->data; +} + +void +_mesa_hash_table_u64_remove(struct hash_table_u64 *ht, uint64_t key) +{ + struct hash_entry *entry; + + if (key == FREED_KEY_VALUE) { + ht->freed_key_data = NULL; + return; + } + + if (key == DELETED_KEY_VALUE) { + ht->deleted_key_data = NULL; + return; + } + + entry = hash_table_u64_search(ht, key); + if (!entry) + return; + + if (sizeof(void *) == 8) { + _mesa_hash_table_remove(ht->table, entry); + } else { + struct hash_key *_key = (struct hash_key *)entry->key; + + _mesa_hash_table_remove(ht->table, entry); + free(_key); + } +} diff --git a/src/mesa/util/hash_table.h b/src/mesa/util/hash_table.h new file mode 100644 index 0000000..8079d10 --- /dev/null +++ b/src/mesa/util/hash_table.h @@ -0,0 +1,197 @@ +/* + * Copyright © 2009,2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#ifndef _HASH_TABLE_H +#define _HASH_TABLE_H + +#include +#include +#include +#include "c99_compat.h" +#include "macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct hash_entry { + uint32_t hash; + const void *key; + void *data; +}; + +struct hash_table { + struct hash_entry *table; + uint32_t (*key_hash_function)(const void *key); + bool (*key_equals_function)(const void *a, const void *b); + const void *deleted_key; + uint32_t size; + uint32_t rehash; + uint64_t size_magic; + uint64_t rehash_magic; + uint32_t max_entries; + uint32_t size_index; + uint32_t entries; + uint32_t deleted_entries; +}; + +struct hash_table * +_mesa_hash_table_create(void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)); + +bool +_mesa_hash_table_init(struct hash_table *ht, + void *mem_ctx, + uint32_t (*key_hash_function)(const void *key), + bool (*key_equals_function)(const void *a, + const void *b)); + +struct hash_table * +_mesa_hash_table_create_u32_keys(void *mem_ctx); + +struct hash_table * +_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx); +void _mesa_hash_table_destroy(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)); +void _mesa_hash_table_clear(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)); +void _mesa_hash_table_set_deleted_key(struct hash_table *ht, + const void *deleted_key); + +static inline uint32_t _mesa_hash_table_num_entries(struct hash_table *ht) +{ + return ht->entries; +} + +struct hash_entry * +_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data); +struct hash_entry * +_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key, void *data); +struct hash_entry * +_mesa_hash_table_search(struct hash_table *ht, const void *key); +struct hash_entry * +_mesa_hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash, + const void *key); +void _mesa_hash_table_remove(struct hash_table *ht, + struct hash_entry *entry); +void _mesa_hash_table_remove_key(struct hash_table *ht, + const void *key); + +struct hash_entry *_mesa_hash_table_next_entry(struct hash_table *ht, + struct hash_entry *entry); +struct hash_entry *_mesa_hash_table_next_entry_unsafe(const struct hash_table *ht, + struct hash_entry *entry); +struct hash_entry * +_mesa_hash_table_random_entry(struct hash_table *ht, + bool (*predicate)(struct hash_entry *entry)); + +uint32_t _mesa_hash_data(const void *data, size_t size); +uint32_t _mesa_hash_data_with_seed(const void *data, size_t size, uint32_t seed); + +uint32_t _mesa_hash_int(const void *key); +uint32_t _mesa_hash_uint(const void *key); +uint32_t _mesa_hash_u32(const void *key); +uint32_t _mesa_hash_string(const void *key); +uint32_t _mesa_hash_pointer(const void *pointer); + +bool _mesa_key_int_equal(const void *a, const void *b); +bool _mesa_key_uint_equal(const void *a, const void *b); +bool _mesa_key_u32_equal(const void *a, const void *b); +bool _mesa_key_string_equal(const void *a, const void *b); +bool _mesa_key_pointer_equal(const void *a, const void *b); + +struct hash_table * +_mesa_pointer_hash_table_create(void *mem_ctx); + +bool +_mesa_hash_table_reserve(struct hash_table *ht, unsigned size); +/** + * This foreach function is safe against deletion (which just replaces + * an entry's data with the deleted marker), but not against insertion + * (which may rehash the table, making entry a dangling pointer). + */ +#define hash_table_foreach(ht, entry) \ + for (struct hash_entry *entry = _mesa_hash_table_next_entry(ht, NULL); \ + entry != NULL; \ + entry = _mesa_hash_table_next_entry(ht, entry)) +/** + * This foreach function destroys the table as it iterates. + * It is not safe to use when inserting or removing entries. + */ +#define hash_table_foreach_remove(ht, entry) \ + for (struct hash_entry *entry = _mesa_hash_table_next_entry_unsafe(ht, NULL); \ + (ht)->entries; \ + entry->hash = 0, entry->key = (void*)NULL, entry->data = NULL, \ + (ht)->entries--, entry = _mesa_hash_table_next_entry_unsafe(ht, entry)) + +static inline void +hash_table_call_foreach(struct hash_table *ht, + void (*callback)(const void *key, + void *data, + void *closure), + void *closure) +{ + hash_table_foreach(ht, entry) + callback(entry->key, entry->data, closure); +} + +/** + * Hash table wrapper which supports 64-bit keys. + */ +struct hash_table_u64 { + struct hash_table *table; + void *freed_key_data; + void *deleted_key_data; +}; + +struct hash_table_u64 * +_mesa_hash_table_u64_create(void *mem_ctx); + +void +_mesa_hash_table_u64_destroy(struct hash_table_u64 *ht); + +void +_mesa_hash_table_u64_insert(struct hash_table_u64 *ht, uint64_t key, + void *data); + +void * +_mesa_hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key); + +void +_mesa_hash_table_u64_remove(struct hash_table_u64 *ht, uint64_t key); + +void +_mesa_hash_table_u64_clear(struct hash_table_u64 *ht); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* _HASH_TABLE_H */ diff --git a/src/mesa/util/ralloc.c b/src/mesa/util/ralloc.c new file mode 100644 index 0000000..4c2cf07 --- /dev/null +++ b/src/mesa/util/ralloc.c @@ -0,0 +1,936 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "util/macros.h" +#include "util/u_math.h" + +/* Some versions of MinGW are missing _vscprintf's declaration, although they + * still provide the symbol in the import library. */ +#ifdef __MINGW32__ +_CRTIMP int _vscprintf(const char *format, va_list argptr); +#endif + +#include "ralloc.h" + +#ifndef va_copy +#ifdef __va_copy +#define va_copy(dest, src) __va_copy((dest), (src)) +#else +#define va_copy(dest, src) (dest) = (src) +#endif +#endif + +#define CANARY 0x5A1106 + +/* Align the header's size so that ralloc() allocations will return with the + * same alignment as a libc malloc would have (8 on 32-bit GLIBC, 16 on + * 64-bit), avoiding performance penalities on x86 and alignment faults on + * ARM. + */ +struct +#ifdef _MSC_VER +#if _WIN64 +__declspec(align(16)) +#else + __declspec(align(8)) +#endif +#elif defined(__LP64__) + __attribute__((aligned(16))) +#else + __attribute__((aligned(8))) +#endif + ralloc_header +{ +#ifndef NDEBUG + /* A canary value used to determine whether a pointer is ralloc'd. */ + unsigned canary; +#endif + + struct ralloc_header *parent; + + /* The first child (head of a linked list) */ + struct ralloc_header *child; + + /* Linked list of siblings */ + struct ralloc_header *prev; + struct ralloc_header *next; + + void (*destructor)(void *); +}; + +typedef struct ralloc_header ralloc_header; + +static void unlink_block(ralloc_header *info); +static void unsafe_free(ralloc_header *info); + +static ralloc_header * +get_header(const void *ptr) +{ + ralloc_header *info = (ralloc_header *) (((char *) ptr) - + sizeof(ralloc_header)); + assert(info->canary == CANARY); + return info; +} + +#define PTR_FROM_HEADER(info) (((char *) info) + sizeof(ralloc_header)) + +static void +add_child(ralloc_header *parent, ralloc_header *info) +{ + if (parent != NULL) { + info->parent = parent; + info->next = parent->child; + parent->child = info; + + if (info->next != NULL) + info->next->prev = info; + } +} + +void * +ralloc_context(const void *ctx) +{ + return ralloc_size(ctx, 0); +} + +void * +ralloc_size(const void *ctx, size_t size) +{ + /* Some malloc allocation doesn't always align to 16 bytes even on 64 bits + * system, from Android bionic/tests/malloc_test.cpp: + * - Allocations of a size that rounds up to a multiple of 16 bytes + * must have at least 16 byte alignment. + * - Allocations of a size that rounds up to a multiple of 8 bytes and + * not 16 bytes, are only required to have at least 8 byte alignment. + */ + void *block = malloc(align64(size + sizeof(ralloc_header), + alignof(ralloc_header))); + ralloc_header *info; + ralloc_header *parent; + + if (unlikely(block == NULL)) + return NULL; + + info = (ralloc_header *) block; + /* measurements have shown that calloc is slower (because of + * the multiplication overflow checking?), so clear things + * manually + */ + info->parent = NULL; + info->child = NULL; + info->prev = NULL; + info->next = NULL; + info->destructor = NULL; + + parent = ctx != NULL ? get_header(ctx) : NULL; + + add_child(parent, info); + +#ifndef NDEBUG + info->canary = CANARY; +#endif + + return PTR_FROM_HEADER(info); +} + +void * +rzalloc_size(const void *ctx, size_t size) +{ + void *ptr = ralloc_size(ctx, size); + + if (likely(ptr)) + memset(ptr, 0, size); + + return ptr; +} + +/* helper function - assumes ptr != NULL */ +static void * +resize(void *ptr, size_t size) +{ + ralloc_header *child, *old, *info; + + old = get_header(ptr); + info = realloc(old, align64(size + sizeof(ralloc_header), + alignof(ralloc_header))); + + if (info == NULL) + return NULL; + + /* Update parent and sibling's links to the reallocated node. */ + if (info != old && info->parent != NULL) { + if (info->parent->child == old) + info->parent->child = info; + + if (info->prev != NULL) + info->prev->next = info; + + if (info->next != NULL) + info->next->prev = info; + } + + /* Update child->parent links for all children */ + for (child = info->child; child != NULL; child = child->next) + child->parent = info; + + return PTR_FROM_HEADER(info); +} + +void * +reralloc_size(const void *ctx, void *ptr, size_t size) +{ + if (unlikely(ptr == NULL)) + return ralloc_size(ctx, size); + + assert(ralloc_parent(ptr) == ctx); + return resize(ptr, size); +} + +void * +rerzalloc_size(const void *ctx, void *ptr, size_t old_size, size_t new_size) +{ + if (unlikely(ptr == NULL)) + return rzalloc_size(ctx, new_size); + + assert(ralloc_parent(ptr) == ctx); + ptr = resize(ptr, new_size); + + if (new_size > old_size) + memset((char *)ptr + old_size, 0, new_size - old_size); + + return ptr; +} + +void * +ralloc_array_size(const void *ctx, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return ralloc_size(ctx, size * count); +} + +void * +rzalloc_array_size(const void *ctx, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return rzalloc_size(ctx, size * count); +} + +void * +reralloc_array_size(const void *ctx, void *ptr, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return reralloc_size(ctx, ptr, size * count); +} + +void * +rerzalloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned old_count, unsigned new_count) +{ + if (new_count > SIZE_MAX/size) + return NULL; + + return rerzalloc_size(ctx, ptr, size * old_count, size * new_count); +} + +void +ralloc_free(void *ptr) +{ + ralloc_header *info; + + if (ptr == NULL) + return; + + info = get_header(ptr); + unlink_block(info); + unsafe_free(info); +} + +static void +unlink_block(ralloc_header *info) +{ + /* Unlink from parent & siblings */ + if (info->parent != NULL) { + if (info->parent->child == info) + info->parent->child = info->next; + + if (info->prev != NULL) + info->prev->next = info->next; + + if (info->next != NULL) + info->next->prev = info->prev; + } + info->parent = NULL; + info->prev = NULL; + info->next = NULL; +} + +static void +unsafe_free(ralloc_header *info) +{ + /* Recursively free any children...don't waste time unlinking them. */ + ralloc_header *temp; + while (info->child != NULL) { + temp = info->child; + info->child = temp->next; + unsafe_free(temp); + } + + /* Free the block itself. Call the destructor first, if any. */ + if (info->destructor != NULL) + info->destructor(PTR_FROM_HEADER(info)); + + free(info); +} + +void +ralloc_steal(const void *new_ctx, void *ptr) +{ + ralloc_header *info, *parent; + + if (unlikely(ptr == NULL)) + return; + + info = get_header(ptr); + parent = new_ctx ? get_header(new_ctx) : NULL; + + unlink_block(info); + + add_child(parent, info); +} + +void +ralloc_adopt(const void *new_ctx, void *old_ctx) +{ + ralloc_header *new_info, *old_info, *child; + + if (unlikely(old_ctx == NULL)) + return; + + old_info = get_header(old_ctx); + new_info = get_header(new_ctx); + + /* If there are no children, bail. */ + if (unlikely(old_info->child == NULL)) + return; + + /* Set all the children's parent to new_ctx; get a pointer to the last child. */ + for (child = old_info->child; child->next != NULL; child = child->next) { + child->parent = new_info; + } + child->parent = new_info; + + /* Connect the two lists together; parent them to new_ctx; make old_ctx empty. */ + child->next = new_info->child; + if (child->next) + child->next->prev = child; + new_info->child = old_info->child; + old_info->child = NULL; +} + +void * +ralloc_parent(const void *ptr) +{ + ralloc_header *info; + + if (unlikely(ptr == NULL)) + return NULL; + + info = get_header(ptr); + return info->parent ? PTR_FROM_HEADER(info->parent) : NULL; +} + +void +ralloc_set_destructor(const void *ptr, void(*destructor)(void *)) +{ + ralloc_header *info = get_header(ptr); + info->destructor = destructor; +} + +char * +ralloc_strdup(const void *ctx, const char *str) +{ + size_t n; + char *ptr; + + if (unlikely(str == NULL)) + return NULL; + + n = strlen(str); + ptr = ralloc_array(ctx, char, n + 1); + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +char * +ralloc_strndup(const void *ctx, const char *str, size_t max) +{ + size_t n; + char *ptr; + + if (unlikely(str == NULL)) + return NULL; + + n = strnlen(str, max); + ptr = ralloc_array(ctx, char, n + 1); + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +/* helper routine for strcat/strncat - n is the exact amount to copy */ +static bool +cat(char **dest, const char *str, size_t n) +{ + char *both; + size_t existing_length; + assert(dest != NULL && *dest != NULL); + + existing_length = strlen(*dest); + both = resize(*dest, existing_length + n + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, n); + both[existing_length + n] = '\0'; + + *dest = both; + return true; +} + + +bool +ralloc_strcat(char **dest, const char *str) +{ + return cat(dest, str, strlen(str)); +} + +bool +ralloc_strncat(char **dest, const char *str, size_t n) +{ + return cat(dest, str, strnlen(str, n)); +} + +bool +ralloc_str_append(char **dest, const char *str, + size_t existing_length, size_t str_size) +{ + char *both; + assert(dest != NULL && *dest != NULL); + + both = resize(*dest, existing_length + str_size + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, str_size); + both[existing_length + str_size] = '\0'; + + *dest = both; + + return true; +} + +char * +ralloc_asprintf(const void *ctx, const char *fmt, ...) +{ + char *ptr; + va_list args; + va_start(args, fmt); + ptr = ralloc_vasprintf(ctx, fmt, args); + va_end(args); + return ptr; +} + +/* Return the length of the string that would be generated by a printf-style + * format and argument list, not including the \0 byte. + */ +static size_t +printf_length(const char *fmt, va_list untouched_args) +{ + int size; + char junk; + + /* Make a copy of the va_list so the original caller can still use it */ + va_list args; + va_copy(args, untouched_args); + +#ifdef _WIN32 + /* We need to use _vcsprintf to calculate the size as vsnprintf returns -1 + * if the number of characters to write is greater than count. + */ + size = _vscprintf(fmt, args); + (void)junk; +#else + size = vsnprintf(&junk, 1, fmt, args); +#endif + assert(size >= 0); + + va_end(args); + + return size; +} + +char * +ralloc_vasprintf(const void *ctx, const char *fmt, va_list args) +{ + size_t size = printf_length(fmt, args) + 1; + + char *ptr = ralloc_size(ctx, size); + if (ptr != NULL) + vsnprintf(ptr, size, fmt, args); + + return ptr; +} + +bool +ralloc_asprintf_append(char **str, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = ralloc_vasprintf_append(str, fmt, args); + va_end(args); + return success; +} + +bool +ralloc_vasprintf_append(char **str, const char *fmt, va_list args) +{ + size_t existing_length; + assert(str != NULL); + existing_length = *str ? strlen(*str) : 0; + return ralloc_vasprintf_rewrite_tail(str, &existing_length, fmt, args); +} + +bool +ralloc_asprintf_rewrite_tail(char **str, size_t *start, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = ralloc_vasprintf_rewrite_tail(str, start, fmt, args); + va_end(args); + return success; +} + +bool +ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, + va_list args) +{ + size_t new_length; + char *ptr; + + assert(str != NULL); + + if (unlikely(*str == NULL)) { + // Assuming a NULL context is probably bad, but it's expected behavior. + *str = ralloc_vasprintf(NULL, fmt, args); + *start = strlen(*str); + return true; + } + + new_length = printf_length(fmt, args); + + ptr = resize(*str, *start + new_length + 1); + if (unlikely(ptr == NULL)) + return false; + + vsnprintf(ptr + *start, new_length + 1, fmt, args); + *str = ptr; + *start += new_length; + return true; +} + +/*************************************************************************** + * Linear allocator for short-lived allocations. + *************************************************************************** + * + * The allocator consists of a parent node (2K buffer), which requires + * a ralloc parent, and child nodes (allocations). Child nodes can't be freed + * directly, because the parent doesn't track them. You have to release + * the parent node in order to release all its children. + * + * The allocator uses a fixed-sized buffer with a monotonically increasing + * offset after each allocation. If the buffer is all used, another buffer + * is allocated, sharing the same ralloc parent, so all buffers are at + * the same level in the ralloc hierarchy. + * + * The linear parent node is always the first buffer and keeps track of all + * other buffers. + */ + +#define MIN_LINEAR_BUFSIZE 2048 +#define SUBALLOC_ALIGNMENT 8 +#define LMAGIC 0x87b9c7d3 + +struct +#ifdef _MSC_VER + __declspec(align(8)) +#elif defined(__LP64__) + __attribute__((aligned(16))) +#else + __attribute__((aligned(8))) +#endif + linear_header { +#ifndef NDEBUG + unsigned magic; /* for debugging */ +#endif + unsigned offset; /* points to the first unused byte in the buffer */ + unsigned size; /* size of the buffer */ + void *ralloc_parent; /* new buffers will use this */ + struct linear_header *next; /* next buffer if we have more */ + struct linear_header *latest; /* the only buffer that has free space */ + + /* After this structure, the buffer begins. + * Each suballocation consists of linear_size_chunk as its header followed + * by the suballocation, so it goes: + * + * - linear_size_chunk + * - allocated space + * - linear_size_chunk + * - allocated space + * etc. + * + * linear_size_chunk is only needed by linear_realloc. + */ +}; + +struct linear_size_chunk { + unsigned size; /* for realloc */ + unsigned _padding; +}; + +typedef struct linear_header linear_header; +typedef struct linear_size_chunk linear_size_chunk; + +#define LINEAR_PARENT_TO_HEADER(parent) \ + (linear_header*) \ + ((char*)(parent) - sizeof(linear_size_chunk) - sizeof(linear_header)) + +/* Allocate the linear buffer with its header. */ +static linear_header * +create_linear_node(void *ralloc_ctx, unsigned min_size) +{ + linear_header *node; + + min_size += sizeof(linear_size_chunk); + + if (likely(min_size < MIN_LINEAR_BUFSIZE)) + min_size = MIN_LINEAR_BUFSIZE; + + node = ralloc_size(ralloc_ctx, sizeof(linear_header) + min_size); + if (unlikely(!node)) + return NULL; + +#ifndef NDEBUG + node->magic = LMAGIC; +#endif + node->offset = 0; + node->size = min_size; + node->ralloc_parent = ralloc_ctx; + node->next = NULL; + node->latest = node; + return node; +} + +void * +linear_alloc_child(void *parent, unsigned size) +{ + linear_header *first = LINEAR_PARENT_TO_HEADER(parent); + linear_header *latest = first->latest; + linear_header *new_node; + linear_size_chunk *ptr; + unsigned full_size; + + assert(first->magic == LMAGIC); + assert(!latest->next); + + size = ALIGN_POT(size, SUBALLOC_ALIGNMENT); + full_size = sizeof(linear_size_chunk) + size; + + if (unlikely(latest->offset + full_size > latest->size)) { + /* allocate a new node */ + new_node = create_linear_node(latest->ralloc_parent, size); + if (unlikely(!new_node)) + return NULL; + + first->latest = new_node; + latest->latest = new_node; + latest->next = new_node; + latest = new_node; + } + + ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset); + ptr->size = size; + latest->offset += full_size; + + assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0); + return &ptr[1]; +} + +void * +linear_alloc_parent(void *ralloc_ctx, unsigned size) +{ + linear_header *node; + + if (unlikely(!ralloc_ctx)) + return NULL; + + size = ALIGN_POT(size, SUBALLOC_ALIGNMENT); + + node = create_linear_node(ralloc_ctx, size); + if (unlikely(!node)) + return NULL; + + return linear_alloc_child((char*)node + + sizeof(linear_header) + + sizeof(linear_size_chunk), size); +} + +void * +linear_zalloc_child(void *parent, unsigned size) +{ + void *ptr = linear_alloc_child(parent, size); + + if (likely(ptr)) + memset(ptr, 0, size); + return ptr; +} + +void * +linear_zalloc_parent(void *parent, unsigned size) +{ + void *ptr = linear_alloc_parent(parent, size); + + if (likely(ptr)) + memset(ptr, 0, size); + return ptr; +} + +void +linear_free_parent(void *ptr) +{ + linear_header *node; + + if (unlikely(!ptr)) + return; + + node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + + while (node) { + void *ptr = node; + + node = node->next; + ralloc_free(ptr); + } +} + +void +ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr) +{ + linear_header *node; + + if (unlikely(!ptr)) + return; + + node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + + while (node) { + ralloc_steal(new_ralloc_ctx, node); + node->ralloc_parent = new_ralloc_ctx; + node = node->next; + } +} + +void * +ralloc_parent_of_linear_parent(void *ptr) +{ + linear_header *node = LINEAR_PARENT_TO_HEADER(ptr); + assert(node->magic == LMAGIC); + return node->ralloc_parent; +} + +void * +linear_realloc(void *parent, void *old, unsigned new_size) +{ + unsigned old_size = 0; + ralloc_header *new_ptr; + + new_ptr = linear_alloc_child(parent, new_size); + + if (unlikely(!old)) + return new_ptr; + + old_size = ((linear_size_chunk*)old)[-1].size; + + if (likely(new_ptr && old_size)) + memcpy(new_ptr, old, MIN2(old_size, new_size)); + + return new_ptr; +} + +/* All code below is pretty much copied from ralloc and only the alloc + * calls are different. + */ + +char * +linear_strdup(void *parent, const char *str) +{ + unsigned n; + char *ptr; + + if (unlikely(!str)) + return NULL; + + n = strlen(str); + ptr = linear_alloc_child(parent, n + 1); + if (unlikely(!ptr)) + return NULL; + + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +char * +linear_asprintf(void *parent, const char *fmt, ...) +{ + char *ptr; + va_list args; + va_start(args, fmt); + ptr = linear_vasprintf(parent, fmt, args); + va_end(args); + return ptr; +} + +char * +linear_vasprintf(void *parent, const char *fmt, va_list args) +{ + unsigned size = printf_length(fmt, args) + 1; + + char *ptr = linear_alloc_child(parent, size); + if (ptr != NULL) + vsnprintf(ptr, size, fmt, args); + + return ptr; +} + +bool +linear_asprintf_append(void *parent, char **str, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = linear_vasprintf_append(parent, str, fmt, args); + va_end(args); + return success; +} + +bool +linear_vasprintf_append(void *parent, char **str, const char *fmt, va_list args) +{ + size_t existing_length; + assert(str != NULL); + existing_length = *str ? strlen(*str) : 0; + return linear_vasprintf_rewrite_tail(parent, str, &existing_length, fmt, args); +} + +bool +linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = linear_vasprintf_rewrite_tail(parent, str, start, fmt, args); + va_end(args); + return success; +} + +bool +linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, va_list args) +{ + size_t new_length; + char *ptr; + + assert(str != NULL); + + if (unlikely(*str == NULL)) { + *str = linear_vasprintf(parent, fmt, args); + *start = strlen(*str); + return true; + } + + new_length = printf_length(fmt, args); + + ptr = linear_realloc(parent, *str, *start + new_length + 1); + if (unlikely(ptr == NULL)) + return false; + + vsnprintf(ptr + *start, new_length + 1, fmt, args); + *str = ptr; + *start += new_length; + return true; +} + +/* helper routine for strcat/strncat - n is the exact amount to copy */ +static bool +linear_cat(void *parent, char **dest, const char *str, unsigned n) +{ + char *both; + unsigned existing_length; + assert(dest != NULL && *dest != NULL); + + existing_length = strlen(*dest); + both = linear_realloc(parent, *dest, existing_length + n + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, n); + both[existing_length + n] = '\0'; + + *dest = both; + return true; +} + +bool +linear_strcat(void *parent, char **dest, const char *str) +{ + return linear_cat(parent, dest, str, strlen(str)); +} diff --git a/src/mesa/util/ralloc.h b/src/mesa/util/ralloc.h new file mode 100644 index 0000000..857ca5f --- /dev/null +++ b/src/mesa/util/ralloc.h @@ -0,0 +1,604 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ralloc.h + * + * ralloc: a recursive memory allocator + * + * The ralloc memory allocator creates a hierarchy of allocated + * objects. Every allocation is in reference to some parent, and + * every allocated object can in turn be used as the parent of a + * subsequent allocation. This allows for extremely convenient + * discarding of an entire tree/sub-tree of allocations by calling + * ralloc_free on any particular object to free it and all of its + * children. + * + * The conceptual working of ralloc was directly inspired by Andrew + * Tridgell's talloc, but ralloc is an independent implementation + * released under the MIT license and tuned for Mesa. + * + * talloc is more sophisticated than ralloc in that it includes reference + * counting and useful debugging features. However, it is released under + * a non-permissive open source license. + */ + +#ifndef RALLOC_H +#define RALLOC_H + +#include +#include +#include + +#include "macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \def ralloc(ctx, type) + * Allocate a new object chained off of the given context. + * + * This is equivalent to: + * \code + * ((type *) ralloc_size(ctx, sizeof(type)) + * \endcode + */ +#define ralloc(ctx, type) ((type *) ralloc_size(ctx, sizeof(type))) + +/** + * \def rzalloc(ctx, type) + * Allocate a new object out of the given context and initialize it to zero. + * + * This is equivalent to: + * \code + * ((type *) rzalloc_size(ctx, sizeof(type)) + * \endcode + */ +#define rzalloc(ctx, type) ((type *) rzalloc_size(ctx, sizeof(type))) + +/** + * Allocate a new ralloc context. + * + * While any ralloc'd pointer can be used as a context, sometimes it is useful + * to simply allocate a context with no associated memory. + * + * It is equivalent to: + * \code + * ((type *) ralloc_size(ctx, 0) + * \endcode + */ +void *ralloc_context(const void *ctx); + +/** + * Allocate memory chained off of the given context. + * + * This is the core allocation routine which is used by all others. It + * simply allocates storage for \p size bytes and returns the pointer, + * similar to \c malloc. + */ +void *ralloc_size(const void *ctx, size_t size) MALLOCLIKE; + +/** + * Allocate zero-initialized memory chained off of the given context. + * + * This is similar to \c calloc with a size of 1. + */ +void *rzalloc_size(const void *ctx, size_t size) MALLOCLIKE; + +/** + * Resize a piece of ralloc-managed memory, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the memory to be resized. May be NULL. + * \param size The amount of memory to allocate, in bytes. + */ +void *reralloc_size(const void *ctx, void *ptr, size_t size); + +/** + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the memory to be resized. May be NULL. + * \param old_size The amount of memory in the previous allocation, in bytes. + * \param new_size The amount of memory to allocate, in bytes. + */ +void *rerzalloc_size(const void *ctx, void *ptr, + size_t old_size, size_t new_size); + +/// \defgroup array Array Allocators @{ + +/** + * \def ralloc_array(ctx, type, count) + * Allocate an array of objects chained off the given context. + * + * Similar to \c calloc, but does not initialize the memory to zero. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * This is equivalent to: + * \code + * ((type *) ralloc_array_size(ctx, sizeof(type), count) + * \endcode + */ +#define ralloc_array(ctx, type, count) \ + ((type *) ralloc_array_size(ctx, sizeof(type), count)) + +/** + * \def rzalloc_array(ctx, type, count) + * Allocate a zero-initialized array chained off the given context. + * + * Similar to \c calloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * This is equivalent to: + * \code + * ((type *) rzalloc_array_size(ctx, sizeof(type), count) + * \endcode + */ +#define rzalloc_array(ctx, type, count) \ + ((type *) rzalloc_array_size(ctx, sizeof(type), count)) + +/** + * \def reralloc(ctx, ptr, type, count) + * Resize a ralloc-managed array, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param type The element type. + * \param count The number of elements to allocate. + */ +#define reralloc(ctx, ptr, type, count) \ + ((type *) reralloc_array_size(ctx, ptr, sizeof(type), count)) + +/** + * \def rerzalloc(ctx, ptr, type, count) + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param type The element type. + * \param old_count The number of elements in the previous allocation. + * \param new_count The number of elements to allocate. + */ +#define rerzalloc(ctx, ptr, type, old_count, new_count) \ + ((type *) rerzalloc_array_size(ctx, ptr, sizeof(type), old_count, new_count)) + +/** + * Allocate memory for an array chained off the given context. + * + * Similar to \c calloc, but does not initialize the memory to zero. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \p size and \p count. This is necessary for security. + */ +void *ralloc_array_size(const void *ctx, size_t size, unsigned count) MALLOCLIKE; + +/** + * Allocate a zero-initialized array chained off the given context. + * + * Similar to \c calloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \p size and \p count. This is necessary for security. + */ +void *rzalloc_array_size(const void *ctx, size_t size, unsigned count) MALLOCLIKE; + +/** + * Resize a ralloc-managed array, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param size The size of an individual element. + * \param count The number of elements to allocate. + * + * \return True unless allocation failed. + */ +void *reralloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned count); + +/** + * Resize a ralloc-managed array, preserving data and initializing any newly + * allocated data to zero. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param size The size of an individual element. + * \param old_count The number of elements in the previous allocation. + * \param new_count The number of elements to allocate. + * + * \return True unless allocation failed. + */ +void *rerzalloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned old_count, unsigned new_count); +/// @} + +/** + * Free a piece of ralloc-managed memory. + * + * This will also free the memory of any children allocated this context. + */ +void ralloc_free(void *ptr); + +/** + * "Steal" memory from one context, changing it to another. + * + * This changes \p ptr's context to \p new_ctx. This is quite useful if + * memory is allocated out of a temporary context. + */ +void ralloc_steal(const void *new_ctx, void *ptr); + +/** + * Reparent all children from one context to another. + * + * This effectively calls ralloc_steal(new_ctx, child) for all children of \p old_ctx. + */ +void ralloc_adopt(const void *new_ctx, void *old_ctx); + +/** + * Return the given pointer's ralloc context. + */ +void *ralloc_parent(const void *ptr); + +/** + * Set a callback to occur just before an object is freed. + */ +void ralloc_set_destructor(const void *ptr, void(*destructor)(void *)); + +/// \defgroup array String Functions @{ +/** + * Duplicate a string, allocating the memory from the given context. + */ +char *ralloc_strdup(const void *ctx, const char *str) MALLOCLIKE; + +/** + * Duplicate a string, allocating the memory from the given context. + * + * Like \c strndup, at most \p n characters are copied. If \p str is longer + * than \p n characters, \p n are copied, and a termining \c '\0' byte is added. + */ +char *ralloc_strndup(const void *ctx, const char *str, size_t n) MALLOCLIKE; + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends \p str to \p *dest, similar to \c strcat, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated. + * + * \return True unless allocation failed. + */ +bool ralloc_strcat(char **dest, const char *str); + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends at most \p n bytes of \p str to \p *dest, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated; \p str does not need to be null + * terminated if it is longer than \p n. + * + * \return True unless allocation failed. + */ +bool ralloc_strncat(char **dest, const char *str, size_t n); + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends \p n bytes of \p str to \p *dest, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated. + * + * This function differs from ralloc_strcat() and ralloc_strncat() in that it + * does not do any strlen() calls which can become costly on large strings. + * + * \return True unless allocation failed. + */ +bool +ralloc_str_append(char **dest, const char *str, + size_t existing_length, size_t str_size); + +/** + * Print to a string. + * + * This is analogous to \c sprintf, but allocates enough space (using \p ctx + * as the context) for the resulting string. + * + * \return The newly allocated string. + */ +char *ralloc_asprintf (const void *ctx, const char *fmt, ...) PRINTFLIKE(2, 3) MALLOCLIKE; + +/** + * Print to a string, given a va_list. + * + * This is analogous to \c vsprintf, but allocates enough space (using \p ctx + * as the context) for the resulting string. + * + * \return The newly allocated string. + */ +char *ralloc_vasprintf(const void *ctx, const char *fmt, va_list args) MALLOCLIKE; + +/** + * Rewrite the tail of an existing string, starting at a given index. + * + * Overwrites the contents of *str starting at \p start with newly formatted + * text, including a new null-terminator. Allocates more memory as necessary. + * + * This can be used to append formatted text when the length of the existing + * string is already known, saving a strlen() call. + * + * \sa ralloc_asprintf_append + * + * \param str The string to be updated. + * \param start The index to start appending new data at. + * \param fmt A printf-style formatting string + * + * \p str will be updated to the new pointer unless allocation fails. + * \p start will be increased by the length of the newly formatted text. + * + * \return True unless allocation failed. + */ +bool ralloc_asprintf_rewrite_tail(char **str, size_t *start, + const char *fmt, ...) + PRINTFLIKE(3, 4); + +/** + * Rewrite the tail of an existing string, starting at a given index. + * + * Overwrites the contents of *str starting at \p start with newly formatted + * text, including a new null-terminator. Allocates more memory as necessary. + * + * This can be used to append formatted text when the length of the existing + * string is already known, saving a strlen() call. + * + * \sa ralloc_vasprintf_append + * + * \param str The string to be updated. + * \param start The index to start appending new data at. + * \param fmt A printf-style formatting string + * \param args A va_list containing the data to be formatted + * + * \p str will be updated to the new pointer unless allocation fails. + * \p start will be increased by the length of the newly formatted text. + * + * \return True unless allocation failed. + */ +bool ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, + va_list args); + +/** + * Append formatted text to the supplied string. + * + * This is equivalent to + * \code + * ralloc_asprintf_rewrite_tail(str, strlen(*str), fmt, ...) + * \endcode + * + * \sa ralloc_asprintf + * \sa ralloc_asprintf_rewrite_tail + * \sa ralloc_strcat + * + * \p str will be updated to the new pointer unless allocation fails. + * + * \return True unless allocation failed. + */ +bool ralloc_asprintf_append (char **str, const char *fmt, ...) + PRINTFLIKE(2, 3); + +/** + * Append formatted text to the supplied string, given a va_list. + * + * This is equivalent to + * \code + * ralloc_vasprintf_rewrite_tail(str, strlen(*str), fmt, args) + * \endcode + * + * \sa ralloc_vasprintf + * \sa ralloc_vasprintf_rewrite_tail + * \sa ralloc_strcat + * + * \p str will be updated to the new pointer unless allocation fails. + * + * \return True unless allocation failed. + */ +bool ralloc_vasprintf_append(char **str, const char *fmt, va_list args); +/// @} + +/** + * Declare C++ new and delete operators which use ralloc. + * + * Placing this macro in the body of a class makes it possible to do: + * + * TYPE *var = new(mem_ctx) TYPE(...); + * delete var; + * + * which is more idiomatic in C++ than calling ralloc. + */ +#define DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(TYPE, ALLOC_FUNC) \ +private: \ + static void _ralloc_destructor(void *p) \ + { \ + reinterpret_cast(p)->TYPE::~TYPE(); \ + } \ +public: \ + static void* operator new(size_t size, void *mem_ctx) \ + { \ + void *p = ALLOC_FUNC(mem_ctx, size); \ + assert(p != NULL); \ + if (!HAS_TRIVIAL_DESTRUCTOR(TYPE)) \ + ralloc_set_destructor(p, _ralloc_destructor); \ + return p; \ + } \ + \ + static void operator delete(void *p) \ + { \ + /* The object's destructor is guaranteed to have already been \ + * called by the delete operator at this point -- Make sure it's \ + * not called again. \ + */ \ + if (!HAS_TRIVIAL_DESTRUCTOR(TYPE)) \ + ralloc_set_destructor(p, NULL); \ + ralloc_free(p); \ + } + +#define DECLARE_RALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, ralloc_size) + +#define DECLARE_RZALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, rzalloc_size) + +#define DECLARE_LINEAR_ALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, linear_alloc_child) + +#define DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(type) \ + DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, linear_zalloc_child) + + +/** + * Do a fast allocation from the linear buffer, also known as the child node + * from the allocator's point of view. It can't be freed directly. You have + * to free the parent or the ralloc parent. + * + * \param parent parent node of the linear allocator + * \param size size to allocate (max 32 bits) + */ +void *linear_alloc_child(void *parent, unsigned size); + +/** + * Allocate a parent node that will hold linear buffers. The returned + * allocation is actually the first child node, but it's also the handle + * of the parent node. Use it for all child node allocations. + * + * \param ralloc_ctx ralloc context, must not be NULL + * \param size size to allocate (max 32 bits) + */ +void *linear_alloc_parent(void *ralloc_ctx, unsigned size); + +/** + * Same as linear_alloc_child, but also clears memory. + */ +void *linear_zalloc_child(void *parent, unsigned size); + +/** + * Same as linear_alloc_parent, but also clears memory. + */ +void *linear_zalloc_parent(void *ralloc_ctx, unsigned size); + +/** + * Free the linear parent node. This will free all child nodes too. + * Freeing the ralloc parent will also free this. + */ +void linear_free_parent(void *ptr); + +/** + * Same as ralloc_steal, but steals the linear parent node. + */ +void ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr); + +/** + * Return the ralloc parent of the linear parent node. + */ +void *ralloc_parent_of_linear_parent(void *ptr); + +/** + * Same as realloc except that the linear allocator doesn't free child nodes, + * so it's reduced to memory duplication. It's used in places where + * reallocation is required. Don't use it often. It's much slower than + * realloc. + */ +void *linear_realloc(void *parent, void *old, unsigned new_size); + +/* The functions below have the same semantics as their ralloc counterparts, + * except that they always allocate a linear child node. + */ +char *linear_strdup(void *parent, const char *str); +char *linear_asprintf(void *parent, const char *fmt, ...); +char *linear_vasprintf(void *parent, const char *fmt, va_list args); +bool linear_asprintf_append(void *parent, char **str, const char *fmt, ...); +bool linear_vasprintf_append(void *parent, char **str, const char *fmt, + va_list args); +bool linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, ...); +bool linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start, + const char *fmt, va_list args); +bool linear_strcat(void *parent, char **dest, const char *str); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif diff --git a/src/mesa/util/xxhash.h b/src/mesa/util/xxhash.h new file mode 100644 index 0000000..eb9e865 --- /dev/null +++ b/src/mesa/util/xxhash.h @@ -0,0 +1,1446 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +Note : SMHasher's CRC32 implementation is not the fastest one. +Other speed-oriented implementations can be faster, +especially in combination with PCLMUL instruction : +http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735 + +A 64-bit version, named XXH64, is available since r35. +It offers much better speed, but for 64-bit applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +/* Mesa leaves strict aliasing on in the compiler, and this code likes to + * dereference the passed in data as u32*, which means that the compiler is + * free to move the u32 read before the write of the struct members being + * hashed, and in practice it did in freedreno. Forcing these two things + * prevents it. + */ +#define XXH_FORCE_ALIGN_CHECK 0 +#define XXH_FORCE_MEMORY_ACCESS 0 + +#include "util/compiler.h" /* for FALLTHROUGH */ + +#if defined (__cplusplus) +extern "C" { +#endif + + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +/* **************************** + * API modifier + ******************************/ +/** XXH_INLINE_ALL (and XXH_PRIVATE_API) + * This build macro includes xxhash functions in `static` mode + * in order to inline them, and remove their symbol from the public list. + * Inlining offers great performance improvement on small keys, + * and dramatic ones when length is expressed as a compile-time constant. + * See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html . + * Methodology : + * #define XXH_INLINE_ALL + * #include "xxhash.h" + * `xxhash.c` is automatically included. + * It's not useful to compile and link it as a separate object. + */ +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else + /* this version may generate warnings for unused static functions */ +# define XXH_PUBLIC_API static +# endif +#else +# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# ifdef XXH_EXPORT +# define XXH_PUBLIC_API __declspec(dllexport) +# elif XXH_IMPORT +# define XXH_PUBLIC_API __declspec(dllimport) +# endif +# else +# define XXH_PUBLIC_API /* do nothing */ +# endif +#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ + +/*! XXH_NAMESPACE, aka Namespace Emulation : + * + * If you want to include _and expose_ xxHash functions from within your own library, + * but also want to avoid symbol collisions with other libraries which may also include xxHash, + * + * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library + * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + * + * Note that no change is required within the calling program as long as it includes `xxhash.h` : + * regular symbol name will be automatically translated by this header. + */ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 7 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/*-********************************************************************** +* 32-bit hash +************************************************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint32_t XXH32_hash_t; +#else +# include +# if UINT_MAX == 0xFFFFFFFFUL + typedef unsigned int XXH32_hash_t; +# else +# if ULONG_MAX == 0xFFFFFFFFUL + typedef unsigned long XXH32_hash_t; +# else +# error "unsupported platform : need a 32-bit type" +# endif +# endif +#endif + +/*! XXH32() : + Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); + +/******* Streaming *******/ + +/* + * Streaming functions generate the xxHash value from an incrememtal input. + * This method is slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * XXH state must first be allocated, using XXH*_createState() . + * + * Start a new hash by initializing state with a seed, using XXH*_reset(). + * + * Then, feed the hash state by calling XXH*_update() as many times as necessary. + * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using XXH*_digest(). + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a digest, + * and generate some new hash values later on, by invoking again XXH*_digest(). + * + * When done, release the state, using XXH*_freeState(). + */ + +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +/******* Canonical representation *******/ + +/* Default return values from XXH functions are basic unsigned 32 and 64 bits. + * This the simplest and fastest format for further post-processing. + * However, this leaves open the question of what is the order of bytes, + * since little and big endian conventions will write the same number differently. + * + * The canonical representation settles this issue, + * by mandating big-endian convention, + * aka, the same convention as human-readable numbers (large digits first). + * When writing hash values to storage, sending them over a network, or printing them, + * it's highly recommended to use the canonical representation, + * to ensure portability across a wider range of systems, present and future. + * + * The following functions allow transformation of hash values into and from canonical format. + */ + +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); + + +#ifndef XXH_NO_LONG_LONG +/*-********************************************************************** +* 64-bit hash +************************************************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t XXH64_hash_t; +#else + /* the following type must have a width of 64-bit */ + typedef unsigned long long XXH64_hash_t; +#endif + +/*! XXH64() : + * Returns the 64-bit hash of sequence of length @length stored at memory address @input. + * @seed can be used to alter the result predictably. + * This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). + */ +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed); + +/******* Streaming *******/ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/******* Canonical representation *******/ +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + + +#endif /* XXH_NO_LONG_LONG */ + +#endif /* XXHASH_H_5627135585666179 */ + + + +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) +#define XXHASH_H_STATIC_13879238742 +/* ************************************************************************************************ + This section contains declarations which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + These declarations should only be used with static linking. + Never use them in association with dynamic linking ! +*************************************************************************************************** */ + +/* These definitions are only present to allow + * static allocation of XXH state, on stack or in a struct for example. + * Never **ever** use members directly. */ + +struct XXH32_state_s { + XXH32_hash_t total_len_32; + XXH32_hash_t large_len; + XXH32_hash_t v1; + XXH32_hash_t v2; + XXH32_hash_t v3; + XXH32_hash_t v4; + XXH32_hash_t mem32[4]; + XXH32_hash_t memsize; + XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + + +#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ + +struct XXH64_state_s { + XXH64_hash_t total_len; + XXH64_hash_t v1; + XXH64_hash_t v2; + XXH64_hash_t v3; + XXH64_hash_t v4; + XXH64_hash_t mem64[4]; + XXH32_hash_t memsize; + XXH32_hash_t reserved32; /* required for padding anyway */ + XXH64_hash_t reserved64; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ + +#endif /* XXH_NO_LONG_LONG */ + +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# define XXH_IMPLEMENTATION +#endif + +#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ + + + +/*-********************************************************************** +* xxHash implementation +* Functions implementation used to be hosted within xxhash.c . +* However, code inlining requires to place implementation in the header file. +* As a consequence, xxhash.c used to be included within xxhash.h . +* But some build systems don't like *.c inclusions. +* So the implementation is now directly integrated within xxhash.h . +* Another small advantage is that xxhash.c is no longer required in /includes . +************************************************************************/ + +#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ + || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) +# define XXH_IMPLEM_13a8737387 + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7))) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. + * When this macro is enabled, xxHash actively checks input for null pointer. + * It it is, result for null input pointers is the same as a null-length input. + */ +#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ +# define XXH_ACCEPT_NULL_INPUT_POINTER 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; + * set it to 0 when the input is guaranteed to be aligned, + * or when alignment doesn't matter for performance. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + +/*!XXH_REROLL: + * Whether to reroll XXH32_finalize, and XXH64_finalize, + * instead of using an unrolled jump table/if statement loop. + * + * This is automatically defined on -Os/-Oz on GCC and Clang. */ +#ifndef XXH_REROLL +# if defined(__OPTIMIZE_SIZE__) +# define XXH_REROLL 1 +# else +# define XXH_REROLL 0 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/*! Modify the local functions below should you wish to use some other memory routines +* for malloc(), free() */ +#include +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/*! and for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#include /* ULLONG_MAX */ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# define XXH_FORCE_INLINE static __forceinline +# define XXH_NO_INLINE static __declspec(noinline) +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define XXH_FORCE_INLINE static inline __attribute__((always_inline)) +# define XXH_NO_INLINE static __attribute__((noinline)) +# else +# define XXH_FORCE_INLINE static inline +# define XXH_NO_INLINE static +# endif +# else +# define XXH_FORCE_INLINE static +# define XXH_NO_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + + +/* ************************************* +* Debug +***************************************/ +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + +#if (DEBUGLEVEL>=1) +# include /* note : can still be disabled with NDEBUG */ +# define XXH_ASSERT(c) assert(c) +#else +# define XXH_ASSERT(c) ((void)0) +#endif + +/* note : use after variable declarations */ +#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } + + +/* ************************************* +* Basic Types +***************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t xxh_u8; +#else + typedef unsigned char xxh_u8; +#endif +typedef XXH32_hash_t xxh_u32; + + +/* *** Memory access *** */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; +static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ +static xxh_u32 XXH_read32(const void* memPtr) +{ + xxh_u32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* *** Endianess *** */ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN +# if defined(_WIN32) /* Windows is always little endian */ \ + || defined(__LITTLE_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 0 +# else +static int XXH_isLittleEndian(void) +{ + const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +# endif +#endif + + + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +#if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64) +# define XXH_rotl32 __builtin_rotateleft32 +# define XXH_rotl64 __builtin_rotateleft64 +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#elif defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static xxh_u32 XXH_swap32 (xxh_u32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); +} + +static xxh_u32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +XXH_FORCE_INLINE xxh_u32 +XXH_readLE32_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) { + return XXH_readLE32(ptr); + } else { + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); + } +} + + +/* ************************************* +* Misc +***************************************/ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ******************************************************************* +* 32-bit hash functions +*********************************************************************/ +static const xxh_u32 PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */ +static const xxh_u32 PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */ +static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */ +static const xxh_u32 PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */ +static const xxh_u32 PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */ + +static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) +{ + acc += input * PRIME32_2; + acc = XXH_rotl32(acc, 13); + acc *= PRIME32_1; +#if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE) + /* UGLY HACK: + * This inline assembly hack forces acc into a normal register. This is the + * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop + * (pragmas and attributes don't work for some resason) without globally + * disabling SSE4.1. + * + * The reason we want to avoid vectorization is because despite working on + * 4 integers at a time, there are multiple factors slowing XXH32 down on + * SSE4: + * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!) + * making it slightly slower to multiply four integers at once compared to four + * integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is + * still not worth it to go into SSE just to multiply unless doing a long operation. + * + * - Four instructions are required to rotate, + * movqda tmp, v // not required with VEX encoding + * pslld tmp, 13 // tmp <<= 13 + * psrld v, 19 // x >>= 19 + * por v, tmp // x |= tmp + * compared to one for scalar: + * roll v, 13 // reliably fast across the board + * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason + * + * - Instruction level parallelism is actually more beneficial here because the + * SIMD actually serializes this operation: While v1 is rotating, v2 can load data, + * while v3 can multiply. SSE forces them to operate together. + * + * How this hack works: + * __asm__("" // Declare an assembly block but don't declare any instructions + * : // However, as an Input/Output Operand, + * "+r" // constrain a read/write operand (+) as a general purpose register (r). + * (acc) // and set acc as the operand + * ); + * + * Because of the 'r', the compiler has promised that seed will be in a + * general purpose register and the '+' says that it will be 'read/write', + * so it has to assume it has changed. It is like volatile without all the + * loads and stores. + * + * Since the argument has to be in a normal register (not an SSE register), + * each time XXH32_round is called, it is impossible to vectorize. */ + __asm__("" : "+r" (acc)); +#endif + return acc; +} + +/* mix all bits */ +static xxh_u32 XXH32_avalanche(xxh_u32 h32) +{ + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + return(h32); +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, align) + +static xxh_u32 +XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define PROCESS1 \ + h32 += (*ptr++) * PRIME32_5; \ + h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; + +#define PROCESS4 \ + h32 += XXH_get32bits(ptr) * PRIME32_3; \ + ptr+=4; \ + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + + /* Compact rerolled version */ + if (XXH_REROLL) { + len &= 15; + while (len >= 4) { + PROCESS4; + len -= 4; + } + while (len > 0) { + PROCESS1; + --len; + } + return XXH32_avalanche(h32); + } else { + switch(len&15) /* or switch(bEnd - p) */ { + case 12: PROCESS4; + FALLTHROUGH; + case 8: PROCESS4; + FALLTHROUGH; + case 4: PROCESS4; + return XXH32_avalanche(h32); + + case 13: PROCESS4; + FALLTHROUGH; + case 9: PROCESS4; + FALLTHROUGH; + case 5: PROCESS4; + PROCESS1; + return XXH32_avalanche(h32); + + case 14: PROCESS4; + FALLTHROUGH; + case 10: PROCESS4; + FALLTHROUGH; + case 6: PROCESS4; + PROCESS1; + PROCESS1; + return XXH32_avalanche(h32); + + case 15: PROCESS4; + FALLTHROUGH; + case 11: PROCESS4; + FALLTHROUGH; + case 7: PROCESS4; + FALLTHROUGH; + case 3: PROCESS1; + FALLTHROUGH; + case 2: PROCESS1; + FALLTHROUGH; + case 1: PROCESS1; + FALLTHROUGH; + case 0: return XXH32_avalanche(h32); + } + XXH_ASSERT(0); + return h32; /* reaching this point is deemed impossible */ + } +} + +XXH_FORCE_INLINE xxh_u32 +XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) +{ + const xxh_u8* bEnd = input + len; + xxh_u32 h32; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (input==NULL) { + len=0; + bEnd=input=(const xxh_u8*)(size_t)16; + } +#endif + + if (len>=16) { + const xxh_u8* const limit = bEnd - 15; + xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2; + xxh_u32 v2 = seed + PRIME32_2; + xxh_u32 v3 = seed + 0; + xxh_u32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; + v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; + v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; + v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; + } while (input < limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (xxh_u32)len; + + return XXH32_finalize(h32, input, len&15, align); +} + + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, (const xxh_u8*)input, len); + return XXH32_digest(&state); + +#else + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); +#endif +} + + + +/******* Hash streaming *******/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode +XXH32_update(XXH32_state_t* state, const void* input, size_t len) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len_32 += (XXH32_hash_t)len; + state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); + state->memsize += (XXH32_hash_t)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const xxh_u32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32)); + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const xxh_u8* const limit = bEnd - 16; + xxh_u32 v1 = state->v1; + xxh_u32 v2 = state->v2; + xxh_u32 v3 = state->v3; + xxh_u32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state) +{ + xxh_u32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + + XXH_rotl32(state->v2, 7) + + XXH_rotl32(state->v3, 12) + + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); +} + + +/******* Canonical representation *******/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, remaining comparable across different systems. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +* 64-bit hash functions +*********************************************************************/ + +/******* Memory access *******/ + +typedef XXH64_hash_t xxh_u64; + + +/*! XXH_REROLL_XXH64: + * Whether to reroll the XXH64_finalize() loop. + * + * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain + * on 64-bit hosts, as only one jump is required. + * + * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers, + * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes + * ridiculously large (the largest function in the binary on i386!), and rerolling it saves + * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better + * and is more likely to be inlined by the compiler. + * + * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */ +#ifndef XXH_REROLL_XXH64 +# if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \ + || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \ + || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \ + || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \ + || defined(__mips64__) || defined(__mips64)) /* mips64 */ \ + || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */ +# define XXH_REROLL_XXH64 1 +# else +# define XXH_REROLL_XXH64 0 +# endif +#endif /* !defined(XXH_REROLL_XXH64) */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; +static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static xxh_u64 XXH_read64(const void* memPtr) +{ + xxh_u64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static xxh_u64 XXH_swap64 (xxh_u64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); +} + +static xxh_u64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + +XXH_FORCE_INLINE xxh_u64 +XXH_readLE64_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) + return XXH_readLE64(ptr); + else + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); +} + + +/******* xxh64 *******/ + +static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ +static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ +static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */ +static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */ +static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */ + +static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +static xxh_u64 XXH64_avalanche(xxh_u64 h64) +{ + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, align) + +static xxh_u64 +XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define PROCESS1_64 \ + h64 ^= (*ptr++) * PRIME64_5; \ + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + +#define PROCESS4_64 \ + h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \ + ptr+=4; \ + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + +#define PROCESS8_64 { \ + xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \ + ptr+=8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ +} + + /* Rerolled version for 32-bit targets is faster and much smaller. */ + if (XXH_REROLL || XXH_REROLL_XXH64) { + len &= 31; + while (len >= 8) { + PROCESS8_64; + len -= 8; + } + if (len >= 4) { + PROCESS4_64; + len -= 4; + } + while (len > 0) { + PROCESS1_64; + --len; + } + return XXH64_avalanche(h64); + } else { + switch(len & 31) { + case 24: PROCESS8_64; + FALLTHROUGH; + case 16: PROCESS8_64; + FALLTHROUGH; + case 8: PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: PROCESS8_64; + FALLTHROUGH; + case 20: PROCESS8_64; + FALLTHROUGH; + case 12: PROCESS8_64; + FALLTHROUGH; + case 4: PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: PROCESS8_64; + FALLTHROUGH; + case 17: PROCESS8_64; + FALLTHROUGH; + case 9: PROCESS8_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: PROCESS8_64; + FALLTHROUGH; + case 21: PROCESS8_64; + FALLTHROUGH; + case 13: PROCESS8_64; + FALLTHROUGH; + case 5: PROCESS4_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: PROCESS8_64; + FALLTHROUGH; + case 18: PROCESS8_64; + FALLTHROUGH; + case 10: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: PROCESS8_64; + FALLTHROUGH; + case 22: PROCESS8_64; + FALLTHROUGH; + case 14: PROCESS8_64; + FALLTHROUGH; + case 6: PROCESS4_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: PROCESS8_64; + FALLTHROUGH; + case 19: PROCESS8_64; + FALLTHROUGH; + case 11: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: PROCESS8_64; + FALLTHROUGH; + case 23: PROCESS8_64; + FALLTHROUGH; + case 15: PROCESS8_64; + FALLTHROUGH; + case 7: PROCESS4_64; + FALLTHROUGH; + case 3: PROCESS1_64; + FALLTHROUGH; + case 2: PROCESS1_64; + FALLTHROUGH; + case 1: PROCESS1_64; + FALLTHROUGH; + case 0: return XXH64_avalanche(h64); + } + } + /* impossible to reach */ + XXH_ASSERT(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +XXH_FORCE_INLINE xxh_u64 +XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) +{ + const xxh_u8* bEnd = input + len; + xxh_u64 h64; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (input==NULL) { + len=0; + bEnd=input=(const xxh_u8*)(size_t)32; + } +#endif + + if (len>=32) { + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2; + xxh_u64 v2 = seed + PRIME64_2; + xxh_u64 v3 = seed + 0; + xxh_u64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; + v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; + v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; + v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; + } while (input<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (xxh_u64) len; + + return XXH64_finalize(h64, input, len, align); +} + + +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, (const xxh_u8*)input, len); + return XXH64_digest(&state); + +#else + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); + +#endif +} + +/******* Hash Streaming *******/ + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + /* do not write into reserved64, might be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64)); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH64_update (XXH64_state_t* state, const void* input, size_t len) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); + state->memsize += (xxh_u32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = state->v1; + xxh_u64 v2 = state->v2; + xxh_u64 v3 = state->v3; + xxh_u64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state) +{ + xxh_u64 h64; + + if (state->total_len >= 32) { + xxh_u64 const v1 = state->v1; + xxh_u64 const v2 = state->v2; + xxh_u64 const v3 = state->v3; + xxh_u64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 /*seed*/ + PRIME64_5; + } + + h64 += (xxh_u64) state->total_len; + + return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); +} + + +/******* Canonical representation *******/ + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + + + +/* ********************************************************************* +* XXH3 +* New generation hash designed for speed on small keys and vectorization +************************************************************************ */ + +/* #include "xxh3.h" */ + + +#endif /* XXH_NO_LONG_LONG */ + + +#endif /* XXH_IMPLEMENTATION */ + + +#if defined (__cplusplus) +} +#endif