diff --git a/src/mesa/meson.build b/src/mesa/meson.build
index 0d1f57a..fd767a7 100644
--- a/src/mesa/meson.build
+++ b/src/mesa/meson.build
@@ -5,8 +5,10 @@ inc_mesa = include_directories('.', 'compat', 'pipe', 'util')
 
 files_mesa = files(
   'util/bitscan.c',
+  'util/hash_table.c',
   'util/os_file.c',
   'util/os_misc.c',
+  'util/ralloc.c',
   'util/u_cpu_detect.c',
   'util/u_debug.c',
   'util/u_math.c',
diff --git a/src/mesa/util/compiler.h b/src/mesa/util/compiler.h
new file mode 100644
index 0000000..da602cf
--- /dev/null
+++ b/src/mesa/util/compiler.h
@@ -0,0 +1,89 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * \file compiler.h
+ * Compiler-related stuff.
+ */
+
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+
+#include <assert.h>
+
+#include "util/macros.h"
+
+#include "c99_compat.h" /* inline, __func__, etc. */
+
+
+/**
+ * Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32.
+ * Do not use these unless absolutely necessary!
+ * Try to use a runtime test instead.
+ * For now, only used by some DRI hardware drivers for color/texel packing.
+ */
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
+#if defined(__linux__)
+#include <byteswap.h>
+#define CPU_TO_LE32( x )	bswap_32( x )
+#elif defined(__APPLE__)
+#include <CoreFoundation/CFByteOrder.h>
+#define CPU_TO_LE32( x )	CFSwapInt32HostToLittle( x )
+#elif defined(__OpenBSD__)
+#include <sys/types.h>
+#define CPU_TO_LE32( x )	htole32( x )
+#else /*__linux__ */
+#include <sys/endian.h>
+#define CPU_TO_LE32( x )	bswap32( x )
+#endif /*__linux__*/
+#define MESA_BIG_ENDIAN 1
+#else
+#define CPU_TO_LE32( x )	( x )
+#define MESA_LITTLE_ENDIAN 1
+#endif
+#define LE32_TO_CPU( x )	CPU_TO_LE32( x )
+
+
+
+#define IEEE_ONE 0x3f800000
+
+#ifndef __has_attribute
+#  define __has_attribute(x) 0
+#endif
+
+#if __cplusplus >= 201703L || __STDC_VERSION__ > 201710L
+/* Standard C++17/C23 attribute */
+#define FALLTHROUGH [[fallthrough]]
+#elif __has_attribute(fallthrough)
+/* Non-standard but supported by at least gcc and clang */
+#define FALLTHROUGH __attribute__((fallthrough))
+#else
+#define FALLTHROUGH do { } while(0)
+#endif
+
+#endif /* COMPILER_H */
diff --git a/src/mesa/util/fast_urem_by_const.h b/src/mesa/util/fast_urem_by_const.h
new file mode 100644
index 0000000..beb253d
--- /dev/null
+++ b/src/mesa/util/fast_urem_by_const.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2010 Valve Software
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+/*
+ * Code for fast 32-bit unsigned remainder, based off of "Faster Remainder by
+ * Direct Computation: Applications to Compilers and Software Libraries,"
+ * available at https://arxiv.org/pdf/1902.01961.pdf.
+ *
+ * util_fast_urem32(n, d, REMAINDER_MAGIC(d)) returns the same thing as
+ * n % d for any unsigned n and d, however it compiles down to only a few
+ * multiplications, so it should be faster than plain uint32_t modulo if the
+ * same divisor is used many times.
+ */
+
+#define REMAINDER_MAGIC(divisor) \
+   ((uint64_t) ~0ull / (divisor) + 1)
+
+/*
+ * Get bits 64-96 of a 32x64-bit multiply. If __int128_t is available, we use
+ * it, which usually compiles down to one instruction on 64-bit architectures.
+ * Otherwise on 32-bit architectures we usually get four instructions (one
+ * 32x32->64 multiply, one 32x32->32 multiply, and one 64-bit add).
+ */
+
+static inline uint32_t
+_mul32by64_hi(uint32_t a, uint64_t b)
+{
+#ifdef HAVE_UINT128
+   return ((__uint128_t) b * a) >> 64;
+#else
+   /*
+    * Let b = b0 + 2^32 * b1. Then a * b = a * b0 + 2^32 * a * b1. We would
+    * have to do a 96-bit addition to get the full result, except that only
+    * one term has non-zero lower 32 bits, which means that to get the high 32
+    * bits, we only have to add the high 64 bits of each term. Unfortunately,
+    * we have to do the 64-bit addition in case the low 32 bits overflow.
+    */
+   uint32_t b0 = (uint32_t) b;
+   uint32_t b1 = b >> 32;
+   return ((((uint64_t) a * b0) >> 32) + (uint64_t) a * b1) >> 32;
+#endif
+}
+
+static inline uint32_t
+util_fast_urem32(uint32_t n, uint32_t d, uint64_t magic)
+{
+   uint64_t lowbits = magic * n;
+   uint32_t result = _mul32by64_hi(d, lowbits);
+   assert(result == n % d);
+   return result;
+}
+
diff --git a/src/mesa/util/hash_table.c b/src/mesa/util/hash_table.c
new file mode 100644
index 0000000..1811ee7
--- /dev/null
+++ b/src/mesa/util/hash_table.c
@@ -0,0 +1,906 @@
+/*
+ * Copyright © 2009,2012 Intel Corporation
+ * Copyright © 1988-2004 Keith Packard and Bart Massey.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the names of the authors
+ * or their institutions shall not be used in advertising or
+ * otherwise to promote the sale, use or other dealings in this
+ * Software without prior written authorization from the
+ * authors.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Keith Packard <keithp@keithp.com>
+ */
+
+/**
+ * Implements an open-addressing, linear-reprobing hash table.
+ *
+ * For more information, see:
+ *
+ * http://cgit.freedesktop.org/~anholt/hash_table/tree/README
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "hash_table.h"
+#include "ralloc.h"
+#include "macros.h"
+#include "u_memory.h"
+#include "fast_urem_by_const.h"
+#include "util/u_memory.h"
+
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+
+/**
+ * Magic number that gets stored outside of the struct hash_table.
+ *
+ * The hash table needs a particular pointer to be the marker for a key that
+ * was deleted from the table, along with NULL for the "never allocated in the
+ * table" marker.  Legacy GL allows any GLuint to be used as a GL object name,
+ * and we use a 1:1 mapping from GLuints to key pointers, so we need to be
+ * able to track a GLuint that happens to match the deleted key outside of
+ * struct hash_table.  We tell the hash table to use "1" as the deleted key
+ * value, so that we test the deleted-key-in-the-table path as best we can.
+ */
+#define DELETED_KEY_VALUE 1
+
+static inline void *
+uint_key(unsigned id)
+{
+   return (void *)(uintptr_t) id;
+}
+
+static const uint32_t deleted_key_value;
+
+/**
+ * From Knuth -- a good choice for hash/rehash values is p, p-2 where
+ * p and p-2 are both prime.  These tables are sized to have an extra 10%
+ * free to avoid exponential performance degradation as the hash table fills
+ */
+static const struct {
+   uint32_t max_entries, size, rehash;
+   uint64_t size_magic, rehash_magic;
+} hash_sizes[] = {
+#define ENTRY(max_entries, size, rehash) \
+   { max_entries, size, rehash, \
+      REMAINDER_MAGIC(size), REMAINDER_MAGIC(rehash) }
+
+   ENTRY(2,            5,            3            ),
+   ENTRY(4,            7,            5            ),
+   ENTRY(8,            13,           11           ),
+   ENTRY(16,           19,           17           ),
+   ENTRY(32,           43,           41           ),
+   ENTRY(64,           73,           71           ),
+   ENTRY(128,          151,          149          ),
+   ENTRY(256,          283,          281          ),
+   ENTRY(512,          571,          569          ),
+   ENTRY(1024,         1153,         1151         ),
+   ENTRY(2048,         2269,         2267         ),
+   ENTRY(4096,         4519,         4517         ),
+   ENTRY(8192,         9013,         9011         ),
+   ENTRY(16384,        18043,        18041        ),
+   ENTRY(32768,        36109,        36107        ),
+   ENTRY(65536,        72091,        72089        ),
+   ENTRY(131072,       144409,       144407       ),
+   ENTRY(262144,       288361,       288359       ),
+   ENTRY(524288,       576883,       576881       ),
+   ENTRY(1048576,      1153459,      1153457      ),
+   ENTRY(2097152,      2307163,      2307161      ),
+   ENTRY(4194304,      4613893,      4613891      ),
+   ENTRY(8388608,      9227641,      9227639      ),
+   ENTRY(16777216,     18455029,     18455027     ),
+   ENTRY(33554432,     36911011,     36911009     ),
+   ENTRY(67108864,     73819861,     73819859     ),
+   ENTRY(134217728,    147639589,    147639587    ),
+   ENTRY(268435456,    295279081,    295279079    ),
+   ENTRY(536870912,    590559793,    590559791    ),
+   ENTRY(1073741824,   1181116273,   1181116271   ),
+   ENTRY(2147483648ul, 2362232233ul, 2362232231ul )
+};
+
+ASSERTED static inline bool
+key_pointer_is_reserved(const struct hash_table *ht, const void *key)
+{
+   return key == NULL || key == ht->deleted_key;
+}
+
+static int
+entry_is_free(const struct hash_entry *entry)
+{
+   return entry->key == NULL;
+}
+
+static int
+entry_is_deleted(const struct hash_table *ht, struct hash_entry *entry)
+{
+   return entry->key == ht->deleted_key;
+}
+
+static int
+entry_is_present(const struct hash_table *ht, struct hash_entry *entry)
+{
+   return entry->key != NULL && entry->key != ht->deleted_key;
+}
+
+bool
+_mesa_hash_table_init(struct hash_table *ht,
+                      void *mem_ctx,
+                      uint32_t (*key_hash_function)(const void *key),
+                      bool (*key_equals_function)(const void *a,
+                                                  const void *b))
+{
+   ht->size_index = 0;
+   ht->size = hash_sizes[ht->size_index].size;
+   ht->rehash = hash_sizes[ht->size_index].rehash;
+   ht->size_magic = hash_sizes[ht->size_index].size_magic;
+   ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic;
+   ht->max_entries = hash_sizes[ht->size_index].max_entries;
+   ht->key_hash_function = key_hash_function;
+   ht->key_equals_function = key_equals_function;
+   ht->table = rzalloc_array(mem_ctx, struct hash_entry, ht->size);
+   ht->entries = 0;
+   ht->deleted_entries = 0;
+   ht->deleted_key = &deleted_key_value;
+
+   return ht->table != NULL;
+}
+
+struct hash_table *
+_mesa_hash_table_create(void *mem_ctx,
+                        uint32_t (*key_hash_function)(const void *key),
+                        bool (*key_equals_function)(const void *a,
+                                                    const void *b))
+{
+   struct hash_table *ht;
+
+   /* mem_ctx is used to allocate the hash table, but the hash table is used
+    * to allocate all of the suballocations.
+    */
+   ht = ralloc(mem_ctx, struct hash_table);
+   if (ht == NULL)
+      return NULL;
+
+   if (!_mesa_hash_table_init(ht, ht, key_hash_function, key_equals_function)) {
+      ralloc_free(ht);
+      return NULL;
+   }
+
+   return ht;
+}
+
+static uint32_t
+key_u32_hash(const void *key)
+{
+   uint32_t u = (uint32_t)(uintptr_t)key;
+   return _mesa_hash_uint(&u);
+}
+
+static bool
+key_u32_equals(const void *a, const void *b)
+{
+   return (uint32_t)(uintptr_t)a == (uint32_t)(uintptr_t)b;
+}
+
+/* key == 0 and key == deleted_key are not allowed */
+struct hash_table *
+_mesa_hash_table_create_u32_keys(void *mem_ctx)
+{
+   return _mesa_hash_table_create(mem_ctx, key_u32_hash, key_u32_equals);
+}
+
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx)
+{
+   struct hash_table *ht;
+
+   ht = ralloc(dst_mem_ctx, struct hash_table);
+   if (ht == NULL)
+      return NULL;
+
+   memcpy(ht, src, sizeof(struct hash_table));
+
+   ht->table = ralloc_array(ht, struct hash_entry, ht->size);
+   if (ht->table == NULL) {
+      ralloc_free(ht);
+      return NULL;
+   }
+
+   memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry));
+
+   return ht;
+}
+
+/**
+ * Frees the given hash table.
+ *
+ * If delete_function is passed, it gets called on each entry present before
+ * freeing.
+ */
+void
+_mesa_hash_table_destroy(struct hash_table *ht,
+                         void (*delete_function)(struct hash_entry *entry))
+{
+   if (!ht)
+      return;
+
+   if (delete_function) {
+      hash_table_foreach(ht, entry) {
+         delete_function(entry);
+      }
+   }
+   ralloc_free(ht);
+}
+
+static void
+hash_table_clear_fast(struct hash_table *ht)
+{
+   memset(ht->table, 0, sizeof(struct hash_entry) * hash_sizes[ht->size_index].size);
+   ht->entries = ht->deleted_entries = 0;
+}
+
+/**
+ * Deletes all entries of the given hash table without deleting the table
+ * itself or changing its structure.
+ *
+ * If delete_function is passed, it gets called on each entry present.
+ */
+void
+_mesa_hash_table_clear(struct hash_table *ht,
+                       void (*delete_function)(struct hash_entry *entry))
+{
+   if (!ht)
+      return;
+
+   struct hash_entry *entry;
+
+   if (delete_function) {
+      for (entry = ht->table; entry != ht->table + ht->size; entry++) {
+         if (entry_is_present(ht, entry))
+            delete_function(entry);
+
+         entry->key = NULL;
+      }
+      ht->entries = 0;
+      ht->deleted_entries = 0;
+   } else
+      hash_table_clear_fast(ht);
+}
+
+/** Sets the value of the key pointer used for deleted entries in the table.
+ *
+ * The assumption is that usually keys are actual pointers, so we use a
+ * default value of a pointer to an arbitrary piece of storage in the library.
+ * But in some cases a consumer wants to store some other sort of value in the
+ * table, like a uint32_t, in which case that pointer may conflict with one of
+ * their valid keys.  This lets that user select a safe value.
+ *
+ * This must be called before any keys are actually deleted from the table.
+ */
+void
+_mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key)
+{
+   ht->deleted_key = deleted_key;
+}
+
+static struct hash_entry *
+hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
+{
+   assert(!key_pointer_is_reserved(ht, key));
+
+   uint32_t size = ht->size;
+   uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic);
+   uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash,
+                                               ht->rehash_magic);
+   uint32_t hash_address = start_hash_address;
+
+   do {
+      struct hash_entry *entry = ht->table + hash_address;
+
+      if (entry_is_free(entry)) {
+         return NULL;
+      } else if (entry_is_present(ht, entry) && entry->hash == hash) {
+         if (ht->key_equals_function(key, entry->key)) {
+            return entry;
+         }
+      }
+
+      hash_address += double_hash;
+      if (hash_address >= size)
+         hash_address -= size;
+   } while (hash_address != start_hash_address);
+
+   return NULL;
+}
+
+/**
+ * Finds a hash table entry with the given key and hash of that key.
+ *
+ * Returns NULL if no entry is found.  Note that the data pointer may be
+ * modified by the user.
+ */
+struct hash_entry *
+_mesa_hash_table_search(struct hash_table *ht, const void *key)
+{
+   assert(ht->key_hash_function);
+   return hash_table_search(ht, ht->key_hash_function(key), key);
+}
+
+struct hash_entry *
+_mesa_hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash,
+                                  const void *key)
+{
+   assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key));
+   return hash_table_search(ht, hash, key);
+}
+
+static struct hash_entry *
+hash_table_insert(struct hash_table *ht, uint32_t hash,
+                  const void *key, void *data);
+
+static void
+hash_table_insert_rehash(struct hash_table *ht, uint32_t hash,
+                         const void *key, void *data)
+{
+   uint32_t size = ht->size;
+   uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic);
+   uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash,
+                                               ht->rehash_magic);
+   uint32_t hash_address = start_hash_address;
+   do {
+      struct hash_entry *entry = ht->table + hash_address;
+
+      if (likely(entry->key == NULL)) {
+         entry->hash = hash;
+         entry->key = key;
+         entry->data = data;
+         return;
+      }
+
+      hash_address += double_hash;
+      if (hash_address >= size)
+         hash_address -= size;
+   } while (true);
+}
+
+static void
+_mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index)
+{
+   struct hash_table old_ht;
+   struct hash_entry *table;
+
+   if (ht->size_index == new_size_index && ht->deleted_entries == ht->max_entries) {
+      hash_table_clear_fast(ht);
+      assert(!ht->entries);
+      return;
+   }
+
+   if (new_size_index >= ARRAY_SIZE(hash_sizes))
+      return;
+
+   table = rzalloc_array(ralloc_parent(ht->table), struct hash_entry,
+                         hash_sizes[new_size_index].size);
+   if (table == NULL)
+      return;
+
+   old_ht = *ht;
+
+   ht->table = table;
+   ht->size_index = new_size_index;
+   ht->size = hash_sizes[ht->size_index].size;
+   ht->rehash = hash_sizes[ht->size_index].rehash;
+   ht->size_magic = hash_sizes[ht->size_index].size_magic;
+   ht->rehash_magic = hash_sizes[ht->size_index].rehash_magic;
+   ht->max_entries = hash_sizes[ht->size_index].max_entries;
+   ht->entries = 0;
+   ht->deleted_entries = 0;
+
+   hash_table_foreach(&old_ht, entry) {
+      hash_table_insert_rehash(ht, entry->hash, entry->key, entry->data);
+   }
+
+   ht->entries = old_ht.entries;
+
+   ralloc_free(old_ht.table);
+}
+
+static struct hash_entry *
+hash_table_insert(struct hash_table *ht, uint32_t hash,
+                  const void *key, void *data)
+{
+   struct hash_entry *available_entry = NULL;
+
+   assert(!key_pointer_is_reserved(ht, key));
+
+   if (ht->entries >= ht->max_entries) {
+      _mesa_hash_table_rehash(ht, ht->size_index + 1);
+   } else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
+      _mesa_hash_table_rehash(ht, ht->size_index);
+   }
+
+   uint32_t size = ht->size;
+   uint32_t start_hash_address = util_fast_urem32(hash, size, ht->size_magic);
+   uint32_t double_hash = 1 + util_fast_urem32(hash, ht->rehash,
+                                               ht->rehash_magic);
+   uint32_t hash_address = start_hash_address;
+   do {
+      struct hash_entry *entry = ht->table + hash_address;
+
+      if (!entry_is_present(ht, entry)) {
+         /* Stash the first available entry we find */
+         if (available_entry == NULL)
+            available_entry = entry;
+         if (entry_is_free(entry))
+            break;
+      }
+
+      /* Implement replacement when another insert happens
+       * with a matching key.  This is a relatively common
+       * feature of hash tables, with the alternative
+       * generally being "insert the new value as well, and
+       * return it first when the key is searched for".
+       *
+       * Note that the hash table doesn't have a delete
+       * callback.  If freeing of old data pointers is
+       * required to avoid memory leaks, perform a search
+       * before inserting.
+       */
+      if (!entry_is_deleted(ht, entry) &&
+          entry->hash == hash &&
+          ht->key_equals_function(key, entry->key)) {
+         entry->key = key;
+         entry->data = data;
+         return entry;
+      }
+
+      hash_address += double_hash;
+      if (hash_address >= size)
+         hash_address -= size;
+   } while (hash_address != start_hash_address);
+
+   if (available_entry) {
+      if (entry_is_deleted(ht, available_entry))
+         ht->deleted_entries--;
+      available_entry->hash = hash;
+      available_entry->key = key;
+      available_entry->data = data;
+      ht->entries++;
+      return available_entry;
+   }
+
+   /* We could hit here if a required resize failed. An unchecked-malloc
+    * application could ignore this result.
+    */
+   return NULL;
+}
+
+/**
+ * Inserts the key with the given hash into the table.
+ *
+ * Note that insertion may rearrange the table on a resize or rehash,
+ * so previously found hash_entries are no longer valid after this function.
+ */
+struct hash_entry *
+_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data)
+{
+   assert(ht->key_hash_function);
+   return hash_table_insert(ht, ht->key_hash_function(key), key, data);
+}
+
+struct hash_entry *
+_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash,
+                                   const void *key, void *data)
+{
+   assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key));
+   return hash_table_insert(ht, hash, key, data);
+}
+
+/**
+ * This function deletes the given hash table entry.
+ *
+ * Note that deletion doesn't otherwise modify the table, so an iteration over
+ * the table deleting entries is safe.
+ */
+void
+_mesa_hash_table_remove(struct hash_table *ht,
+                        struct hash_entry *entry)
+{
+   if (!entry)
+      return;
+
+   entry->key = ht->deleted_key;
+   ht->entries--;
+   ht->deleted_entries++;
+}
+
+/**
+ * Removes the entry with the corresponding key, if exists.
+ */
+void _mesa_hash_table_remove_key(struct hash_table *ht,
+                                 const void *key)
+{
+   _mesa_hash_table_remove(ht, _mesa_hash_table_search(ht, key));
+}
+
+/**
+ * This function is an iterator over the hash_table when no deleted entries are present.
+ *
+ * Pass in NULL for the first entry, as in the start of a for loop.
+ */
+struct hash_entry *
+_mesa_hash_table_next_entry_unsafe(const struct hash_table *ht, struct hash_entry *entry)
+{
+   assert(!ht->deleted_entries);
+   if (!ht->entries)
+      return NULL;
+   if (entry == NULL)
+      entry = ht->table;
+   else
+      entry = entry + 1;
+   if (entry != ht->table + ht->size)
+      return entry->key ? entry : _mesa_hash_table_next_entry_unsafe(ht, entry);
+
+   return NULL;
+}
+
+/**
+ * This function is an iterator over the hash table.
+ *
+ * Pass in NULL for the first entry, as in the start of a for loop.  Note that
+ * an iteration over the table is O(table_size) not O(entries).
+ */
+struct hash_entry *
+_mesa_hash_table_next_entry(struct hash_table *ht,
+                            struct hash_entry *entry)
+{
+   if (entry == NULL)
+      entry = ht->table;
+   else
+      entry = entry + 1;
+
+   for (; entry != ht->table + ht->size; entry++) {
+      if (entry_is_present(ht, entry)) {
+         return entry;
+      }
+   }
+
+   return NULL;
+}
+
+/**
+ * Returns a random entry from the hash table.
+ *
+ * This may be useful in implementing random replacement (as opposed
+ * to just removing everything) in caches based on this hash table
+ * implementation.  @predicate may be used to filter entries, or may
+ * be set to NULL for no filtering.
+ */
+struct hash_entry *
+_mesa_hash_table_random_entry(struct hash_table *ht,
+                              bool (*predicate)(struct hash_entry *entry))
+{
+   struct hash_entry *entry;
+   uint32_t i = rand() % ht->size;
+
+   if (ht->entries == 0)
+      return NULL;
+
+   for (entry = ht->table + i; entry != ht->table + ht->size; entry++) {
+      if (entry_is_present(ht, entry) &&
+          (!predicate || predicate(entry))) {
+         return entry;
+      }
+   }
+
+   for (entry = ht->table; entry != ht->table + i; entry++) {
+      if (entry_is_present(ht, entry) &&
+          (!predicate || predicate(entry))) {
+         return entry;
+      }
+   }
+
+   return NULL;
+}
+
+
+uint32_t
+_mesa_hash_data(const void *data, size_t size)
+{
+   return XXH32(data, size, 0);
+}
+
+uint32_t
+_mesa_hash_data_with_seed(const void *data, size_t size, uint32_t seed)
+{
+   return XXH32(data, size, seed);
+}
+
+uint32_t
+_mesa_hash_int(const void *key)
+{
+   return XXH32(key, sizeof(int), 0);
+}
+
+uint32_t
+_mesa_hash_uint(const void *key)
+{
+   return XXH32(key, sizeof(unsigned), 0);
+}
+
+uint32_t
+_mesa_hash_u32(const void *key)
+{
+   return XXH32(key, 4, 0);
+}
+
+/** FNV-1a string hash implementation */
+uint32_t
+_mesa_hash_string(const void *_key)
+{
+   uint32_t hash = 0;
+   const char *key = _key;
+   size_t len = strlen(key);
+#if defined(_WIN64) || defined(__x86_64__)
+   hash = (uint32_t)XXH64(key, len, hash);
+#else
+   hash = XXH32(key, len, hash);
+#endif
+   return hash;
+}
+
+uint32_t
+_mesa_hash_pointer(const void *pointer)
+{
+   uintptr_t num = (uintptr_t) pointer;
+   return (uint32_t) ((num >> 2) ^ (num >> 6) ^ (num >> 10) ^ (num >> 14));
+}
+
+bool
+_mesa_key_int_equal(const void *a, const void *b)
+{
+   return *((const int *)a) == *((const int *)b);
+}
+
+bool
+_mesa_key_uint_equal(const void *a, const void *b)
+{
+
+   return *((const unsigned *)a) == *((const unsigned *)b);
+}
+
+bool
+_mesa_key_u32_equal(const void *a, const void *b)
+{
+   return *((const uint32_t *)a) == *((const uint32_t *)b);
+}
+
+/**
+ * String compare function for use as the comparison callback in
+ * _mesa_hash_table_create().
+ */
+bool
+_mesa_key_string_equal(const void *a, const void *b)
+{
+   return strcmp(a, b) == 0;
+}
+
+bool
+_mesa_key_pointer_equal(const void *a, const void *b)
+{
+   return a == b;
+}
+
+/**
+ * Helper to create a hash table with pointer keys.
+ */
+struct hash_table *
+_mesa_pointer_hash_table_create(void *mem_ctx)
+{
+   return _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                  _mesa_key_pointer_equal);
+}
+
+
+bool
+_mesa_hash_table_reserve(struct hash_table *ht, unsigned size)
+{
+   if (size < ht->max_entries)
+      return true;
+   for (unsigned i = ht->size_index + 1; i < ARRAY_SIZE(hash_sizes); i++) {
+      if (hash_sizes[i].max_entries >= size) {
+         _mesa_hash_table_rehash(ht, i);
+         break;
+      }
+   }
+   return ht->max_entries >= size;
+}
+
+/**
+ * Hash table wrapper which supports 64-bit keys.
+ *
+ * TODO: unify all hash table implementations.
+ */
+
+struct hash_key_u64 {
+   uint64_t value;
+};
+
+static uint32_t
+key_u64_hash(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct hash_key_u64));
+}
+
+static bool
+key_u64_equals(const void *a, const void *b)
+{
+   const struct hash_key_u64 *aa = a;
+   const struct hash_key_u64 *bb = b;
+
+   return aa->value == bb->value;
+}
+
+#define FREED_KEY_VALUE 0
+
+struct hash_table_u64 *
+_mesa_hash_table_u64_create(void *mem_ctx)
+{
+   STATIC_ASSERT(FREED_KEY_VALUE != DELETED_KEY_VALUE);
+   struct hash_table_u64 *ht;
+
+   ht = CALLOC_STRUCT(hash_table_u64);
+   if (!ht)
+      return NULL;
+
+   if (sizeof(void *) == 8) {
+      ht->table = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+   } else {
+      ht->table = _mesa_hash_table_create(mem_ctx, key_u64_hash,
+                                          key_u64_equals);
+   }
+
+   if (ht->table)
+      _mesa_hash_table_set_deleted_key(ht->table, uint_key(DELETED_KEY_VALUE));
+
+   return ht;
+}
+
+static void
+_mesa_hash_table_u64_delete_key(struct hash_entry *entry)
+{
+   if (sizeof(void *) == 8)
+      return;
+
+   struct hash_key_u64 *_key = (struct hash_key_u64 *)entry->key;
+
+   if (_key)
+      free(_key);
+}
+
+void
+_mesa_hash_table_u64_clear(struct hash_table_u64 *ht)
+{
+   if (!ht)
+      return;
+
+   _mesa_hash_table_clear(ht->table, _mesa_hash_table_u64_delete_key);
+}
+
+void
+_mesa_hash_table_u64_destroy(struct hash_table_u64 *ht)
+{
+   if (!ht)
+      return;
+
+   _mesa_hash_table_u64_clear(ht);
+   _mesa_hash_table_destroy(ht->table, NULL);
+   free(ht);
+}
+
+void
+_mesa_hash_table_u64_insert(struct hash_table_u64 *ht, uint64_t key,
+                            void *data)
+{
+   if (key == FREED_KEY_VALUE) {
+      ht->freed_key_data = data;
+      return;
+   }
+
+   if (key == DELETED_KEY_VALUE) {
+      ht->deleted_key_data = data;
+      return;
+   }
+
+   if (sizeof(void *) == 8) {
+      _mesa_hash_table_insert(ht->table, (void *)(uintptr_t)key, data);
+   } else {
+      struct hash_key_u64 *_key = CALLOC_STRUCT(hash_key_u64);
+
+      if (!_key)
+         return;
+      _key->value = key;
+
+      _mesa_hash_table_insert(ht->table, _key, data);
+   }
+}
+
+static struct hash_entry *
+hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key)
+{
+   if (sizeof(void *) == 8) {
+      return _mesa_hash_table_search(ht->table, (void *)(uintptr_t)key);
+   } else {
+      struct hash_key_u64 _key = { .value = key };
+      return _mesa_hash_table_search(ht->table, &_key);
+   }
+}
+
+void *
+_mesa_hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key)
+{
+   struct hash_entry *entry;
+
+   if (key == FREED_KEY_VALUE)
+      return ht->freed_key_data;
+
+   if (key == DELETED_KEY_VALUE)
+      return ht->deleted_key_data;
+
+   entry = hash_table_u64_search(ht, key);
+   if (!entry)
+      return NULL;
+
+   return entry->data;
+}
+
+void
+_mesa_hash_table_u64_remove(struct hash_table_u64 *ht, uint64_t key)
+{
+   struct hash_entry *entry;
+
+   if (key == FREED_KEY_VALUE) {
+      ht->freed_key_data = NULL;
+      return;
+   }
+
+   if (key == DELETED_KEY_VALUE) {
+      ht->deleted_key_data = NULL;
+      return;
+   }
+
+   entry = hash_table_u64_search(ht, key);
+   if (!entry)
+      return;
+
+   if (sizeof(void *) == 8) {
+      _mesa_hash_table_remove(ht->table, entry);
+   } else {
+      struct hash_key *_key = (struct hash_key *)entry->key;
+
+      _mesa_hash_table_remove(ht->table, entry);
+      free(_key);
+   }
+}
diff --git a/src/mesa/util/hash_table.h b/src/mesa/util/hash_table.h
new file mode 100644
index 0000000..8079d10
--- /dev/null
+++ b/src/mesa/util/hash_table.h
@@ -0,0 +1,197 @@
+﻿/*
+ * Copyright © 2009,2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#ifndef _HASH_TABLE_H
+#define _HASH_TABLE_H
+
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include "c99_compat.h"
+#include "macros.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct hash_entry {
+   uint32_t hash;
+   const void *key;
+   void *data;
+};
+
+struct hash_table {
+   struct hash_entry *table;
+   uint32_t (*key_hash_function)(const void *key);
+   bool (*key_equals_function)(const void *a, const void *b);
+   const void *deleted_key;
+   uint32_t size;
+   uint32_t rehash;
+   uint64_t size_magic;
+   uint64_t rehash_magic;
+   uint32_t max_entries;
+   uint32_t size_index;
+   uint32_t entries;
+   uint32_t deleted_entries;
+};
+
+struct hash_table *
+_mesa_hash_table_create(void *mem_ctx,
+                        uint32_t (*key_hash_function)(const void *key),
+                        bool (*key_equals_function)(const void *a,
+                                                    const void *b));
+
+bool
+_mesa_hash_table_init(struct hash_table *ht,
+                      void *mem_ctx,
+                      uint32_t (*key_hash_function)(const void *key),
+                      bool (*key_equals_function)(const void *a,
+                                                  const void *b));
+
+struct hash_table *
+_mesa_hash_table_create_u32_keys(void *mem_ctx);
+
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx);
+void _mesa_hash_table_destroy(struct hash_table *ht,
+                              void (*delete_function)(struct hash_entry *entry));
+void _mesa_hash_table_clear(struct hash_table *ht,
+                            void (*delete_function)(struct hash_entry *entry));
+void _mesa_hash_table_set_deleted_key(struct hash_table *ht,
+                                      const void *deleted_key);
+
+static inline uint32_t _mesa_hash_table_num_entries(struct hash_table *ht)
+{
+   return ht->entries;
+}
+
+struct hash_entry *
+_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data);
+struct hash_entry *
+_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash,
+                                   const void *key, void *data);
+struct hash_entry *
+_mesa_hash_table_search(struct hash_table *ht, const void *key);
+struct hash_entry *
+_mesa_hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash,
+                                  const void *key);
+void _mesa_hash_table_remove(struct hash_table *ht,
+                             struct hash_entry *entry);
+void _mesa_hash_table_remove_key(struct hash_table *ht,
+                                 const void *key);
+
+struct hash_entry *_mesa_hash_table_next_entry(struct hash_table *ht,
+                                               struct hash_entry *entry);
+struct hash_entry *_mesa_hash_table_next_entry_unsafe(const struct hash_table *ht,
+                                               struct hash_entry *entry);
+struct hash_entry *
+_mesa_hash_table_random_entry(struct hash_table *ht,
+                              bool (*predicate)(struct hash_entry *entry));
+
+uint32_t _mesa_hash_data(const void *data, size_t size);
+uint32_t _mesa_hash_data_with_seed(const void *data, size_t size, uint32_t seed);
+
+uint32_t _mesa_hash_int(const void *key);
+uint32_t _mesa_hash_uint(const void *key);
+uint32_t _mesa_hash_u32(const void *key);
+uint32_t _mesa_hash_string(const void *key);
+uint32_t _mesa_hash_pointer(const void *pointer);
+
+bool _mesa_key_int_equal(const void *a, const void *b);
+bool _mesa_key_uint_equal(const void *a, const void *b);
+bool _mesa_key_u32_equal(const void *a, const void *b);
+bool _mesa_key_string_equal(const void *a, const void *b);
+bool _mesa_key_pointer_equal(const void *a, const void *b);
+
+struct hash_table *
+_mesa_pointer_hash_table_create(void *mem_ctx);
+
+bool
+_mesa_hash_table_reserve(struct hash_table *ht, unsigned size);
+/**
+ * This foreach function is safe against deletion (which just replaces
+ * an entry's data with the deleted marker), but not against insertion
+ * (which may rehash the table, making entry a dangling pointer).
+ */
+#define hash_table_foreach(ht, entry)                                      \
+   for (struct hash_entry *entry = _mesa_hash_table_next_entry(ht, NULL);  \
+        entry != NULL;                                                     \
+        entry = _mesa_hash_table_next_entry(ht, entry))
+/**
+ * This foreach function destroys the table as it iterates.
+ * It is not safe to use when inserting or removing entries.
+ */
+#define hash_table_foreach_remove(ht, entry)                                      \
+   for (struct hash_entry *entry = _mesa_hash_table_next_entry_unsafe(ht, NULL);  \
+        (ht)->entries;                                                     \
+        entry->hash = 0, entry->key = (void*)NULL, entry->data = NULL,      \
+        (ht)->entries--, entry = _mesa_hash_table_next_entry_unsafe(ht, entry))
+
+static inline void
+hash_table_call_foreach(struct hash_table *ht,
+                        void (*callback)(const void *key,
+                                         void *data,
+                                         void *closure),
+                        void *closure)
+{
+   hash_table_foreach(ht, entry)
+      callback(entry->key, entry->data, closure);
+}
+
+/**
+ * Hash table wrapper which supports 64-bit keys.
+ */
+struct hash_table_u64 {
+   struct hash_table *table;
+   void *freed_key_data;
+   void *deleted_key_data;
+};
+
+struct hash_table_u64 *
+_mesa_hash_table_u64_create(void *mem_ctx);
+
+void
+_mesa_hash_table_u64_destroy(struct hash_table_u64 *ht);
+
+void
+_mesa_hash_table_u64_insert(struct hash_table_u64 *ht, uint64_t key,
+                            void *data);
+
+void *
+_mesa_hash_table_u64_search(struct hash_table_u64 *ht, uint64_t key);
+
+void
+_mesa_hash_table_u64_remove(struct hash_table_u64 *ht, uint64_t key);
+
+void
+_mesa_hash_table_u64_clear(struct hash_table_u64 *ht);
+
+#ifdef __cplusplus
+} /* extern C */
+#endif
+
+#endif /* _HASH_TABLE_H */
diff --git a/src/mesa/util/ralloc.c b/src/mesa/util/ralloc.c
new file mode 100644
index 0000000..4c2cf07
--- /dev/null
+++ b/src/mesa/util/ralloc.c
@@ -0,0 +1,936 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "util/macros.h"
+#include "util/u_math.h"
+
+/* Some versions of MinGW are missing _vscprintf's declaration, although they
+ * still provide the symbol in the import library. */
+#ifdef __MINGW32__
+_CRTIMP int _vscprintf(const char *format, va_list argptr);
+#endif
+
+#include "ralloc.h"
+
+#ifndef va_copy
+#ifdef __va_copy
+#define va_copy(dest, src) __va_copy((dest), (src))
+#else
+#define va_copy(dest, src) (dest) = (src)
+#endif
+#endif
+
+#define CANARY 0x5A1106
+
+/* Align the header's size so that ralloc() allocations will return with the
+ * same alignment as a libc malloc would have (8 on 32-bit GLIBC, 16 on
+ * 64-bit), avoiding performance penalities on x86 and alignment faults on
+ * ARM.
+ */
+struct
+#ifdef _MSC_VER
+#if _WIN64
+__declspec(align(16))
+#else
+ __declspec(align(8))
+#endif
+#elif defined(__LP64__)
+ __attribute__((aligned(16)))
+#else
+ __attribute__((aligned(8)))
+#endif
+   ralloc_header
+{
+#ifndef NDEBUG
+   /* A canary value used to determine whether a pointer is ralloc'd. */
+   unsigned canary;
+#endif
+
+   struct ralloc_header *parent;
+
+   /* The first child (head of a linked list) */
+   struct ralloc_header *child;
+
+   /* Linked list of siblings */
+   struct ralloc_header *prev;
+   struct ralloc_header *next;
+
+   void (*destructor)(void *);
+};
+
+typedef struct ralloc_header ralloc_header;
+
+static void unlink_block(ralloc_header *info);
+static void unsafe_free(ralloc_header *info);
+
+static ralloc_header *
+get_header(const void *ptr)
+{
+   ralloc_header *info = (ralloc_header *) (((char *) ptr) -
+					    sizeof(ralloc_header));
+   assert(info->canary == CANARY);
+   return info;
+}
+
+#define PTR_FROM_HEADER(info) (((char *) info) + sizeof(ralloc_header))
+
+static void
+add_child(ralloc_header *parent, ralloc_header *info)
+{
+   if (parent != NULL) {
+      info->parent = parent;
+      info->next = parent->child;
+      parent->child = info;
+
+      if (info->next != NULL)
+	 info->next->prev = info;
+   }
+}
+
+void *
+ralloc_context(const void *ctx)
+{
+   return ralloc_size(ctx, 0);
+}
+
+void *
+ralloc_size(const void *ctx, size_t size)
+{
+   /* Some malloc allocation doesn't always align to 16 bytes even on 64 bits
+    * system, from Android bionic/tests/malloc_test.cpp:
+    *  - Allocations of a size that rounds up to a multiple of 16 bytes
+    *    must have at least 16 byte alignment.
+    *  - Allocations of a size that rounds up to a multiple of 8 bytes and
+    *    not 16 bytes, are only required to have at least 8 byte alignment.
+    */
+   void *block = malloc(align64(size + sizeof(ralloc_header),
+                                alignof(ralloc_header)));
+   ralloc_header *info;
+   ralloc_header *parent;
+
+   if (unlikely(block == NULL))
+      return NULL;
+
+   info = (ralloc_header *) block;
+   /* measurements have shown that calloc is slower (because of
+    * the multiplication overflow checking?), so clear things
+    * manually
+    */
+   info->parent = NULL;
+   info->child = NULL;
+   info->prev = NULL;
+   info->next = NULL;
+   info->destructor = NULL;
+
+   parent = ctx != NULL ? get_header(ctx) : NULL;
+
+   add_child(parent, info);
+
+#ifndef NDEBUG
+   info->canary = CANARY;
+#endif
+
+   return PTR_FROM_HEADER(info);
+}
+
+void *
+rzalloc_size(const void *ctx, size_t size)
+{
+   void *ptr = ralloc_size(ctx, size);
+
+   if (likely(ptr))
+      memset(ptr, 0, size);
+
+   return ptr;
+}
+
+/* helper function - assumes ptr != NULL */
+static void *
+resize(void *ptr, size_t size)
+{
+   ralloc_header *child, *old, *info;
+
+   old = get_header(ptr);
+   info = realloc(old, align64(size + sizeof(ralloc_header),
+                               alignof(ralloc_header)));
+
+   if (info == NULL)
+      return NULL;
+
+   /* Update parent and sibling's links to the reallocated node. */
+   if (info != old && info->parent != NULL) {
+      if (info->parent->child == old)
+	 info->parent->child = info;
+
+      if (info->prev != NULL)
+	 info->prev->next = info;
+
+      if (info->next != NULL)
+	 info->next->prev = info;
+   }
+
+   /* Update child->parent links for all children */
+   for (child = info->child; child != NULL; child = child->next)
+      child->parent = info;
+
+   return PTR_FROM_HEADER(info);
+}
+
+void *
+reralloc_size(const void *ctx, void *ptr, size_t size)
+{
+   if (unlikely(ptr == NULL))
+      return ralloc_size(ctx, size);
+
+   assert(ralloc_parent(ptr) == ctx);
+   return resize(ptr, size);
+}
+
+void *
+rerzalloc_size(const void *ctx, void *ptr, size_t old_size, size_t new_size)
+{
+   if (unlikely(ptr == NULL))
+      return rzalloc_size(ctx, new_size);
+
+   assert(ralloc_parent(ptr) == ctx);
+   ptr = resize(ptr, new_size);
+
+   if (new_size > old_size)
+      memset((char *)ptr + old_size, 0, new_size - old_size);
+
+   return ptr;
+}
+
+void *
+ralloc_array_size(const void *ctx, size_t size, unsigned count)
+{
+   if (count > SIZE_MAX/size)
+      return NULL;
+
+   return ralloc_size(ctx, size * count);
+}
+
+void *
+rzalloc_array_size(const void *ctx, size_t size, unsigned count)
+{
+   if (count > SIZE_MAX/size)
+      return NULL;
+
+   return rzalloc_size(ctx, size * count);
+}
+
+void *
+reralloc_array_size(const void *ctx, void *ptr, size_t size, unsigned count)
+{
+   if (count > SIZE_MAX/size)
+      return NULL;
+
+   return reralloc_size(ctx, ptr, size * count);
+}
+
+void *
+rerzalloc_array_size(const void *ctx, void *ptr, size_t size,
+                     unsigned old_count, unsigned new_count)
+{
+   if (new_count > SIZE_MAX/size)
+      return NULL;
+
+   return rerzalloc_size(ctx, ptr, size * old_count, size * new_count);
+}
+
+void
+ralloc_free(void *ptr)
+{
+   ralloc_header *info;
+
+   if (ptr == NULL)
+      return;
+
+   info = get_header(ptr);
+   unlink_block(info);
+   unsafe_free(info);
+}
+
+static void
+unlink_block(ralloc_header *info)
+{
+   /* Unlink from parent & siblings */
+   if (info->parent != NULL) {
+      if (info->parent->child == info)
+	 info->parent->child = info->next;
+
+      if (info->prev != NULL)
+	 info->prev->next = info->next;
+
+      if (info->next != NULL)
+	 info->next->prev = info->prev;
+   }
+   info->parent = NULL;
+   info->prev = NULL;
+   info->next = NULL;
+}
+
+static void
+unsafe_free(ralloc_header *info)
+{
+   /* Recursively free any children...don't waste time unlinking them. */
+   ralloc_header *temp;
+   while (info->child != NULL) {
+      temp = info->child;
+      info->child = temp->next;
+      unsafe_free(temp);
+   }
+
+   /* Free the block itself.  Call the destructor first, if any. */
+   if (info->destructor != NULL)
+      info->destructor(PTR_FROM_HEADER(info));
+
+   free(info);
+}
+
+void
+ralloc_steal(const void *new_ctx, void *ptr)
+{
+   ralloc_header *info, *parent;
+
+   if (unlikely(ptr == NULL))
+      return;
+
+   info = get_header(ptr);
+   parent = new_ctx ? get_header(new_ctx) : NULL;
+
+   unlink_block(info);
+
+   add_child(parent, info);
+}
+
+void
+ralloc_adopt(const void *new_ctx, void *old_ctx)
+{
+   ralloc_header *new_info, *old_info, *child;
+
+   if (unlikely(old_ctx == NULL))
+      return;
+
+   old_info = get_header(old_ctx);
+   new_info = get_header(new_ctx);
+
+   /* If there are no children, bail. */
+   if (unlikely(old_info->child == NULL))
+      return;
+
+   /* Set all the children's parent to new_ctx; get a pointer to the last child. */
+   for (child = old_info->child; child->next != NULL; child = child->next) {
+      child->parent = new_info;
+   }
+   child->parent = new_info;
+
+   /* Connect the two lists together; parent them to new_ctx; make old_ctx empty. */
+   child->next = new_info->child;
+   if (child->next)
+      child->next->prev = child;
+   new_info->child = old_info->child;
+   old_info->child = NULL;
+}
+
+void *
+ralloc_parent(const void *ptr)
+{
+   ralloc_header *info;
+
+   if (unlikely(ptr == NULL))
+      return NULL;
+
+   info = get_header(ptr);
+   return info->parent ? PTR_FROM_HEADER(info->parent) : NULL;
+}
+
+void
+ralloc_set_destructor(const void *ptr, void(*destructor)(void *))
+{
+   ralloc_header *info = get_header(ptr);
+   info->destructor = destructor;
+}
+
+char *
+ralloc_strdup(const void *ctx, const char *str)
+{
+   size_t n;
+   char *ptr;
+
+   if (unlikely(str == NULL))
+      return NULL;
+
+   n = strlen(str);
+   ptr = ralloc_array(ctx, char, n + 1);
+   memcpy(ptr, str, n);
+   ptr[n] = '\0';
+   return ptr;
+}
+
+char *
+ralloc_strndup(const void *ctx, const char *str, size_t max)
+{
+   size_t n;
+   char *ptr;
+
+   if (unlikely(str == NULL))
+      return NULL;
+
+   n = strnlen(str, max);
+   ptr = ralloc_array(ctx, char, n + 1);
+   memcpy(ptr, str, n);
+   ptr[n] = '\0';
+   return ptr;
+}
+
+/* helper routine for strcat/strncat - n is the exact amount to copy */
+static bool
+cat(char **dest, const char *str, size_t n)
+{
+   char *both;
+   size_t existing_length;
+   assert(dest != NULL && *dest != NULL);
+
+   existing_length = strlen(*dest);
+   both = resize(*dest, existing_length + n + 1);
+   if (unlikely(both == NULL))
+      return false;
+
+   memcpy(both + existing_length, str, n);
+   both[existing_length + n] = '\0';
+
+   *dest = both;
+   return true;
+}
+
+
+bool
+ralloc_strcat(char **dest, const char *str)
+{
+   return cat(dest, str, strlen(str));
+}
+
+bool
+ralloc_strncat(char **dest, const char *str, size_t n)
+{
+   return cat(dest, str, strnlen(str, n));
+}
+
+bool
+ralloc_str_append(char **dest, const char *str,
+                  size_t existing_length, size_t str_size)
+{
+   char *both;
+   assert(dest != NULL && *dest != NULL);
+
+   both = resize(*dest, existing_length + str_size + 1);
+   if (unlikely(both == NULL))
+      return false;
+
+   memcpy(both + existing_length, str, str_size);
+   both[existing_length + str_size] = '\0';
+
+   *dest = both;
+
+   return true;
+}
+
+char *
+ralloc_asprintf(const void *ctx, const char *fmt, ...)
+{
+   char *ptr;
+   va_list args;
+   va_start(args, fmt);
+   ptr = ralloc_vasprintf(ctx, fmt, args);
+   va_end(args);
+   return ptr;
+}
+
+/* Return the length of the string that would be generated by a printf-style
+ * format and argument list, not including the \0 byte.
+ */
+static size_t
+printf_length(const char *fmt, va_list untouched_args)
+{
+   int size;
+   char junk;
+
+   /* Make a copy of the va_list so the original caller can still use it */
+   va_list args;
+   va_copy(args, untouched_args);
+
+#ifdef _WIN32
+   /* We need to use _vcsprintf to calculate the size as vsnprintf returns -1
+    * if the number of characters to write is greater than count.
+    */
+   size = _vscprintf(fmt, args);
+   (void)junk;
+#else
+   size = vsnprintf(&junk, 1, fmt, args);
+#endif
+   assert(size >= 0);
+
+   va_end(args);
+
+   return size;
+}
+
+char *
+ralloc_vasprintf(const void *ctx, const char *fmt, va_list args)
+{
+   size_t size = printf_length(fmt, args) + 1;
+
+   char *ptr = ralloc_size(ctx, size);
+   if (ptr != NULL)
+      vsnprintf(ptr, size, fmt, args);
+
+   return ptr;
+}
+
+bool
+ralloc_asprintf_append(char **str, const char *fmt, ...)
+{
+   bool success;
+   va_list args;
+   va_start(args, fmt);
+   success = ralloc_vasprintf_append(str, fmt, args);
+   va_end(args);
+   return success;
+}
+
+bool
+ralloc_vasprintf_append(char **str, const char *fmt, va_list args)
+{
+   size_t existing_length;
+   assert(str != NULL);
+   existing_length = *str ? strlen(*str) : 0;
+   return ralloc_vasprintf_rewrite_tail(str, &existing_length, fmt, args);
+}
+
+bool
+ralloc_asprintf_rewrite_tail(char **str, size_t *start, const char *fmt, ...)
+{
+   bool success;
+   va_list args;
+   va_start(args, fmt);
+   success = ralloc_vasprintf_rewrite_tail(str, start, fmt, args);
+   va_end(args);
+   return success;
+}
+
+bool
+ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
+			      va_list args)
+{
+   size_t new_length;
+   char *ptr;
+
+   assert(str != NULL);
+
+   if (unlikely(*str == NULL)) {
+      // Assuming a NULL context is probably bad, but it's expected behavior.
+      *str = ralloc_vasprintf(NULL, fmt, args);
+      *start = strlen(*str);
+      return true;
+   }
+
+   new_length = printf_length(fmt, args);
+
+   ptr = resize(*str, *start + new_length + 1);
+   if (unlikely(ptr == NULL))
+      return false;
+
+   vsnprintf(ptr + *start, new_length + 1, fmt, args);
+   *str = ptr;
+   *start += new_length;
+   return true;
+}
+
+/***************************************************************************
+ * Linear allocator for short-lived allocations.
+ ***************************************************************************
+ *
+ * The allocator consists of a parent node (2K buffer), which requires
+ * a ralloc parent, and child nodes (allocations). Child nodes can't be freed
+ * directly, because the parent doesn't track them. You have to release
+ * the parent node in order to release all its children.
+ *
+ * The allocator uses a fixed-sized buffer with a monotonically increasing
+ * offset after each allocation. If the buffer is all used, another buffer
+ * is allocated, sharing the same ralloc parent, so all buffers are at
+ * the same level in the ralloc hierarchy.
+ *
+ * The linear parent node is always the first buffer and keeps track of all
+ * other buffers.
+ */
+
+#define MIN_LINEAR_BUFSIZE 2048
+#define SUBALLOC_ALIGNMENT 8
+#define LMAGIC 0x87b9c7d3
+
+struct
+#ifdef _MSC_VER
+ __declspec(align(8))
+#elif defined(__LP64__)
+ __attribute__((aligned(16)))
+#else
+ __attribute__((aligned(8)))
+#endif
+   linear_header {
+#ifndef NDEBUG
+   unsigned magic;   /* for debugging */
+#endif
+   unsigned offset;  /* points to the first unused byte in the buffer */
+   unsigned size;    /* size of the buffer */
+   void *ralloc_parent;          /* new buffers will use this */
+   struct linear_header *next;   /* next buffer if we have more */
+   struct linear_header *latest; /* the only buffer that has free space */
+
+   /* After this structure, the buffer begins.
+    * Each suballocation consists of linear_size_chunk as its header followed
+    * by the suballocation, so it goes:
+    *
+    * - linear_size_chunk
+    * - allocated space
+    * - linear_size_chunk
+    * - allocated space
+    * etc.
+    *
+    * linear_size_chunk is only needed by linear_realloc.
+    */
+};
+
+struct linear_size_chunk {
+   unsigned size; /* for realloc */
+   unsigned _padding;
+};
+
+typedef struct linear_header linear_header;
+typedef struct linear_size_chunk linear_size_chunk;
+
+#define LINEAR_PARENT_TO_HEADER(parent) \
+   (linear_header*) \
+   ((char*)(parent) - sizeof(linear_size_chunk) - sizeof(linear_header))
+
+/* Allocate the linear buffer with its header. */
+static linear_header *
+create_linear_node(void *ralloc_ctx, unsigned min_size)
+{
+   linear_header *node;
+
+   min_size += sizeof(linear_size_chunk);
+
+   if (likely(min_size < MIN_LINEAR_BUFSIZE))
+      min_size = MIN_LINEAR_BUFSIZE;
+
+   node = ralloc_size(ralloc_ctx, sizeof(linear_header) + min_size);
+   if (unlikely(!node))
+      return NULL;
+
+#ifndef NDEBUG
+   node->magic = LMAGIC;
+#endif
+   node->offset = 0;
+   node->size = min_size;
+   node->ralloc_parent = ralloc_ctx;
+   node->next = NULL;
+   node->latest = node;
+   return node;
+}
+
+void *
+linear_alloc_child(void *parent, unsigned size)
+{
+   linear_header *first = LINEAR_PARENT_TO_HEADER(parent);
+   linear_header *latest = first->latest;
+   linear_header *new_node;
+   linear_size_chunk *ptr;
+   unsigned full_size;
+
+   assert(first->magic == LMAGIC);
+   assert(!latest->next);
+
+   size = ALIGN_POT(size, SUBALLOC_ALIGNMENT);
+   full_size = sizeof(linear_size_chunk) + size;
+
+   if (unlikely(latest->offset + full_size > latest->size)) {
+      /* allocate a new node */
+      new_node = create_linear_node(latest->ralloc_parent, size);
+      if (unlikely(!new_node))
+         return NULL;
+
+      first->latest = new_node;
+      latest->latest = new_node;
+      latest->next = new_node;
+      latest = new_node;
+   }
+
+   ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset);
+   ptr->size = size;
+   latest->offset += full_size;
+
+   assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0);
+   return &ptr[1];
+}
+
+void *
+linear_alloc_parent(void *ralloc_ctx, unsigned size)
+{
+   linear_header *node;
+
+   if (unlikely(!ralloc_ctx))
+      return NULL;
+
+   size = ALIGN_POT(size, SUBALLOC_ALIGNMENT);
+
+   node = create_linear_node(ralloc_ctx, size);
+   if (unlikely(!node))
+      return NULL;
+
+   return linear_alloc_child((char*)node +
+                             sizeof(linear_header) +
+                             sizeof(linear_size_chunk), size);
+}
+
+void *
+linear_zalloc_child(void *parent, unsigned size)
+{
+   void *ptr = linear_alloc_child(parent, size);
+
+   if (likely(ptr))
+      memset(ptr, 0, size);
+   return ptr;
+}
+
+void *
+linear_zalloc_parent(void *parent, unsigned size)
+{
+   void *ptr = linear_alloc_parent(parent, size);
+
+   if (likely(ptr))
+      memset(ptr, 0, size);
+   return ptr;
+}
+
+void
+linear_free_parent(void *ptr)
+{
+   linear_header *node;
+
+   if (unlikely(!ptr))
+      return;
+
+   node = LINEAR_PARENT_TO_HEADER(ptr);
+   assert(node->magic == LMAGIC);
+
+   while (node) {
+      void *ptr = node;
+
+      node = node->next;
+      ralloc_free(ptr);
+   }
+}
+
+void
+ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr)
+{
+   linear_header *node;
+
+   if (unlikely(!ptr))
+      return;
+
+   node = LINEAR_PARENT_TO_HEADER(ptr);
+   assert(node->magic == LMAGIC);
+
+   while (node) {
+      ralloc_steal(new_ralloc_ctx, node);
+      node->ralloc_parent = new_ralloc_ctx;
+      node = node->next;
+   }
+}
+
+void *
+ralloc_parent_of_linear_parent(void *ptr)
+{
+   linear_header *node = LINEAR_PARENT_TO_HEADER(ptr);
+   assert(node->magic == LMAGIC);
+   return node->ralloc_parent;
+}
+
+void *
+linear_realloc(void *parent, void *old, unsigned new_size)
+{
+   unsigned old_size = 0;
+   ralloc_header *new_ptr;
+
+   new_ptr = linear_alloc_child(parent, new_size);
+
+   if (unlikely(!old))
+      return new_ptr;
+
+   old_size = ((linear_size_chunk*)old)[-1].size;
+
+   if (likely(new_ptr && old_size))
+      memcpy(new_ptr, old, MIN2(old_size, new_size));
+
+   return new_ptr;
+}
+
+/* All code below is pretty much copied from ralloc and only the alloc
+ * calls are different.
+ */
+
+char *
+linear_strdup(void *parent, const char *str)
+{
+   unsigned n;
+   char *ptr;
+
+   if (unlikely(!str))
+      return NULL;
+
+   n = strlen(str);
+   ptr = linear_alloc_child(parent, n + 1);
+   if (unlikely(!ptr))
+      return NULL;
+
+   memcpy(ptr, str, n);
+   ptr[n] = '\0';
+   return ptr;
+}
+
+char *
+linear_asprintf(void *parent, const char *fmt, ...)
+{
+   char *ptr;
+   va_list args;
+   va_start(args, fmt);
+   ptr = linear_vasprintf(parent, fmt, args);
+   va_end(args);
+   return ptr;
+}
+
+char *
+linear_vasprintf(void *parent, const char *fmt, va_list args)
+{
+   unsigned size = printf_length(fmt, args) + 1;
+
+   char *ptr = linear_alloc_child(parent, size);
+   if (ptr != NULL)
+      vsnprintf(ptr, size, fmt, args);
+
+   return ptr;
+}
+
+bool
+linear_asprintf_append(void *parent, char **str, const char *fmt, ...)
+{
+   bool success;
+   va_list args;
+   va_start(args, fmt);
+   success = linear_vasprintf_append(parent, str, fmt, args);
+   va_end(args);
+   return success;
+}
+
+bool
+linear_vasprintf_append(void *parent, char **str, const char *fmt, va_list args)
+{
+   size_t existing_length;
+   assert(str != NULL);
+   existing_length = *str ? strlen(*str) : 0;
+   return linear_vasprintf_rewrite_tail(parent, str, &existing_length, fmt, args);
+}
+
+bool
+linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start,
+                             const char *fmt, ...)
+{
+   bool success;
+   va_list args;
+   va_start(args, fmt);
+   success = linear_vasprintf_rewrite_tail(parent, str, start, fmt, args);
+   va_end(args);
+   return success;
+}
+
+bool
+linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start,
+                              const char *fmt, va_list args)
+{
+   size_t new_length;
+   char *ptr;
+
+   assert(str != NULL);
+
+   if (unlikely(*str == NULL)) {
+      *str = linear_vasprintf(parent, fmt, args);
+      *start = strlen(*str);
+      return true;
+   }
+
+   new_length = printf_length(fmt, args);
+
+   ptr = linear_realloc(parent, *str, *start + new_length + 1);
+   if (unlikely(ptr == NULL))
+      return false;
+
+   vsnprintf(ptr + *start, new_length + 1, fmt, args);
+   *str = ptr;
+   *start += new_length;
+   return true;
+}
+
+/* helper routine for strcat/strncat - n is the exact amount to copy */
+static bool
+linear_cat(void *parent, char **dest, const char *str, unsigned n)
+{
+   char *both;
+   unsigned existing_length;
+   assert(dest != NULL && *dest != NULL);
+
+   existing_length = strlen(*dest);
+   both = linear_realloc(parent, *dest, existing_length + n + 1);
+   if (unlikely(both == NULL))
+      return false;
+
+   memcpy(both + existing_length, str, n);
+   both[existing_length + n] = '\0';
+
+   *dest = both;
+   return true;
+}
+
+bool
+linear_strcat(void *parent, char **dest, const char *str)
+{
+   return linear_cat(parent, dest, str, strlen(str));
+}
diff --git a/src/mesa/util/ralloc.h b/src/mesa/util/ralloc.h
new file mode 100644
index 0000000..857ca5f
--- /dev/null
+++ b/src/mesa/util/ralloc.h
@@ -0,0 +1,604 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ralloc.h
+ *
+ * ralloc: a recursive memory allocator
+ *
+ * The ralloc memory allocator creates a hierarchy of allocated
+ * objects. Every allocation is in reference to some parent, and
+ * every allocated object can in turn be used as the parent of a
+ * subsequent allocation. This allows for extremely convenient
+ * discarding of an entire tree/sub-tree of allocations by calling
+ * ralloc_free on any particular object to free it and all of its
+ * children.
+ *
+ * The conceptual working of ralloc was directly inspired by Andrew
+ * Tridgell's talloc, but ralloc is an independent implementation
+ * released under the MIT license and tuned for Mesa.
+ *
+ * talloc is more sophisticated than ralloc in that it includes reference
+ * counting and useful debugging features.  However, it is released under
+ * a non-permissive open source license.
+ */
+
+#ifndef RALLOC_H
+#define RALLOC_H
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdbool.h>
+
+#include "macros.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * \def ralloc(ctx, type)
+ * Allocate a new object chained off of the given context.
+ *
+ * This is equivalent to:
+ * \code
+ * ((type *) ralloc_size(ctx, sizeof(type))
+ * \endcode
+ */
+#define ralloc(ctx, type)  ((type *) ralloc_size(ctx, sizeof(type)))
+
+/**
+ * \def rzalloc(ctx, type)
+ * Allocate a new object out of the given context and initialize it to zero.
+ *
+ * This is equivalent to:
+ * \code
+ * ((type *) rzalloc_size(ctx, sizeof(type))
+ * \endcode
+ */
+#define rzalloc(ctx, type) ((type *) rzalloc_size(ctx, sizeof(type)))
+
+/**
+ * Allocate a new ralloc context.
+ *
+ * While any ralloc'd pointer can be used as a context, sometimes it is useful
+ * to simply allocate a context with no associated memory.
+ *
+ * It is equivalent to:
+ * \code
+ * ((type *) ralloc_size(ctx, 0)
+ * \endcode
+ */
+void *ralloc_context(const void *ctx);
+
+/**
+ * Allocate memory chained off of the given context.
+ *
+ * This is the core allocation routine which is used by all others.  It
+ * simply allocates storage for \p size bytes and returns the pointer,
+ * similar to \c malloc.
+ */
+void *ralloc_size(const void *ctx, size_t size) MALLOCLIKE;
+
+/**
+ * Allocate zero-initialized memory chained off of the given context.
+ *
+ * This is similar to \c calloc with a size of 1.
+ */
+void *rzalloc_size(const void *ctx, size_t size) MALLOCLIKE;
+
+/**
+ * Resize a piece of ralloc-managed memory, preserving data.
+ *
+ * Similar to \c realloc.  Unlike C89, passing 0 for \p size does not free the
+ * memory.  Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0).  This is different from talloc.
+ *
+ * \param ctx  The context to use for new allocation.  If \p ptr != NULL,
+ *             it must be the same as ralloc_parent(\p ptr).
+ * \param ptr  Pointer to the memory to be resized.  May be NULL.
+ * \param size The amount of memory to allocate, in bytes.
+ */
+void *reralloc_size(const void *ctx, void *ptr, size_t size);
+
+/**
+ * Resize a ralloc-managed array, preserving data and initializing any newly
+ * allocated data to zero.
+ *
+ * Similar to \c realloc.  Unlike C89, passing 0 for \p size does not free the
+ * memory.  Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0).  This is different from talloc.
+ *
+ * \param ctx        The context to use for new allocation.  If \p ptr != NULL,
+ *                   it must be the same as ralloc_parent(\p ptr).
+ * \param ptr        Pointer to the memory to be resized.  May be NULL.
+ * \param old_size   The amount of memory in the previous allocation, in bytes.
+ * \param new_size   The amount of memory to allocate, in bytes.
+ */
+void *rerzalloc_size(const void *ctx, void *ptr,
+                     size_t old_size, size_t new_size);
+
+/// \defgroup array Array Allocators @{
+
+/**
+ * \def ralloc_array(ctx, type, count)
+ * Allocate an array of objects chained off the given context.
+ *
+ * Similar to \c calloc, but does not initialize the memory to zero.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count.  This is necessary for security.
+ *
+ * This is equivalent to:
+ * \code
+ * ((type *) ralloc_array_size(ctx, sizeof(type), count)
+ * \endcode
+ */
+#define ralloc_array(ctx, type, count) \
+   ((type *) ralloc_array_size(ctx, sizeof(type), count))
+
+/**
+ * \def rzalloc_array(ctx, type, count)
+ * Allocate a zero-initialized array chained off the given context.
+ *
+ * Similar to \c calloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count.  This is necessary for security.
+ *
+ * This is equivalent to:
+ * \code
+ * ((type *) rzalloc_array_size(ctx, sizeof(type), count)
+ * \endcode
+ */
+#define rzalloc_array(ctx, type, count) \
+   ((type *) rzalloc_array_size(ctx, sizeof(type), count))
+
+/**
+ * \def reralloc(ctx, ptr, type, count)
+ * Resize a ralloc-managed array, preserving data.
+ *
+ * Similar to \c realloc.  Unlike C89, passing 0 for \p size does not free the
+ * memory.  Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0).  This is different from talloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count.  This is necessary for security.
+ *
+ * \param ctx   The context to use for new allocation.  If \p ptr != NULL,
+ *              it must be the same as ralloc_parent(\p ptr).
+ * \param ptr   Pointer to the array to be resized.  May be NULL.
+ * \param type  The element type.
+ * \param count The number of elements to allocate.
+ */
+#define reralloc(ctx, ptr, type, count) \
+   ((type *) reralloc_array_size(ctx, ptr, sizeof(type), count))
+
+/**
+ * \def rerzalloc(ctx, ptr, type, count)
+ * Resize a ralloc-managed array, preserving data and initializing any newly
+ * allocated data to zero.
+ *
+ * Similar to \c realloc.  Unlike C89, passing 0 for \p size does not free the
+ * memory.  Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0).  This is different from talloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count.  This is necessary for security.
+ *
+ * \param ctx        The context to use for new allocation.  If \p ptr != NULL,
+ *                   it must be the same as ralloc_parent(\p ptr).
+ * \param ptr        Pointer to the array to be resized.  May be NULL.
+ * \param type       The element type.
+ * \param old_count  The number of elements in the previous allocation.
+ * \param new_count  The number of elements to allocate.
+ */
+#define rerzalloc(ctx, ptr, type, old_count, new_count) \
+   ((type *) rerzalloc_array_size(ctx, ptr, sizeof(type), old_count, new_count))
+
+/**
+ * Allocate memory for an array chained off the given context.
+ *
+ * Similar to \c calloc, but does not initialize the memory to zero.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \p size and \p count.  This is necessary for security.
+ */
+void *ralloc_array_size(const void *ctx, size_t size, unsigned count) MALLOCLIKE;
+
+/**
+ * Allocate a zero-initialized array chained off the given context.
+ *
+ * Similar to \c calloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \p size and \p count.  This is necessary for security.
+ */
+void *rzalloc_array_size(const void *ctx, size_t size, unsigned count) MALLOCLIKE;
+
+/**
+ * Resize a ralloc-managed array, preserving data.
+ *
+ * Similar to \c realloc.  Unlike C89, passing 0 for \p size does not free the
+ * memory.  Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0).  This is different from talloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count.  This is necessary for security.
+ *
+ * \param ctx   The context to use for new allocation.  If \p ptr != NULL,
+ *              it must be the same as ralloc_parent(\p ptr).
+ * \param ptr   Pointer to the array to be resized.  May be NULL.
+ * \param size  The size of an individual element.
+ * \param count The number of elements to allocate.
+ *
+ * \return True unless allocation failed.
+ */
+void *reralloc_array_size(const void *ctx, void *ptr, size_t size,
+			  unsigned count);
+
+/**
+ * Resize a ralloc-managed array, preserving data and initializing any newly
+ * allocated data to zero.
+ *
+ * Similar to \c realloc.  Unlike C89, passing 0 for \p size does not free the
+ * memory.  Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0).  This is different from talloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count.  This is necessary for security.
+ *
+ * \param ctx        The context to use for new allocation.  If \p ptr != NULL,
+ *                   it must be the same as ralloc_parent(\p ptr).
+ * \param ptr        Pointer to the array to be resized.  May be NULL.
+ * \param size       The size of an individual element.
+ * \param old_count  The number of elements in the previous allocation.
+ * \param new_count  The number of elements to allocate.
+ *
+ * \return True unless allocation failed.
+ */
+void *rerzalloc_array_size(const void *ctx, void *ptr, size_t size,
+			   unsigned old_count, unsigned new_count);
+/// @}
+
+/**
+ * Free a piece of ralloc-managed memory.
+ *
+ * This will also free the memory of any children allocated this context.
+ */
+void ralloc_free(void *ptr);
+
+/**
+ * "Steal" memory from one context, changing it to another.
+ *
+ * This changes \p ptr's context to \p new_ctx.  This is quite useful if
+ * memory is allocated out of a temporary context.
+ */
+void ralloc_steal(const void *new_ctx, void *ptr);
+
+/**
+ * Reparent all children from one context to another.
+ *
+ * This effectively calls ralloc_steal(new_ctx, child) for all children of \p old_ctx.
+ */
+void ralloc_adopt(const void *new_ctx, void *old_ctx);
+
+/**
+ * Return the given pointer's ralloc context.
+ */
+void *ralloc_parent(const void *ptr);
+
+/**
+ * Set a callback to occur just before an object is freed.
+ */
+void ralloc_set_destructor(const void *ptr, void(*destructor)(void *));
+
+/// \defgroup array String Functions @{
+/**
+ * Duplicate a string, allocating the memory from the given context.
+ */
+char *ralloc_strdup(const void *ctx, const char *str) MALLOCLIKE;
+
+/**
+ * Duplicate a string, allocating the memory from the given context.
+ *
+ * Like \c strndup, at most \p n characters are copied.  If \p str is longer
+ * than \p n characters, \p n are copied, and a termining \c '\0' byte is added.
+ */
+char *ralloc_strndup(const void *ctx, const char *str, size_t n) MALLOCLIKE;
+
+/**
+ * Concatenate two strings, allocating the necessary space.
+ *
+ * This appends \p str to \p *dest, similar to \c strcat, using ralloc_resize
+ * to expand \p *dest to the appropriate size.  \p dest will be updated to the
+ * new pointer unless allocation fails.
+ *
+ * The result will always be null-terminated.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_strcat(char **dest, const char *str);
+
+/**
+ * Concatenate two strings, allocating the necessary space.
+ *
+ * This appends at most \p n bytes of \p str to \p *dest, using ralloc_resize
+ * to expand \p *dest to the appropriate size.  \p dest will be updated to the
+ * new pointer unless allocation fails.
+ *
+ * The result will always be null-terminated; \p str does not need to be null
+ * terminated if it is longer than \p n.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_strncat(char **dest, const char *str, size_t n);
+
+/**
+ * Concatenate two strings, allocating the necessary space.
+ *
+ * This appends \p n bytes of \p str to \p *dest, using ralloc_resize
+ * to expand \p *dest to the appropriate size.  \p dest will be updated to the
+ * new pointer unless allocation fails.
+ *
+ * The result will always be null-terminated.
+ *
+ * This function differs from ralloc_strcat() and ralloc_strncat() in that it
+ * does not do any strlen() calls which can become costly on large strings.
+ *
+ * \return True unless allocation failed.
+ */
+bool
+ralloc_str_append(char **dest, const char *str,
+                  size_t existing_length, size_t str_size);
+
+/**
+ * Print to a string.
+ *
+ * This is analogous to \c sprintf, but allocates enough space (using \p ctx
+ * as the context) for the resulting string.
+ *
+ * \return The newly allocated string.
+ */
+char *ralloc_asprintf (const void *ctx, const char *fmt, ...) PRINTFLIKE(2, 3) MALLOCLIKE;
+
+/**
+ * Print to a string, given a va_list.
+ *
+ * This is analogous to \c vsprintf, but allocates enough space (using \p ctx
+ * as the context) for the resulting string.
+ *
+ * \return The newly allocated string.
+ */
+char *ralloc_vasprintf(const void *ctx, const char *fmt, va_list args) MALLOCLIKE;
+
+/**
+ * Rewrite the tail of an existing string, starting at a given index.
+ *
+ * Overwrites the contents of *str starting at \p start with newly formatted
+ * text, including a new null-terminator.  Allocates more memory as necessary.
+ *
+ * This can be used to append formatted text when the length of the existing
+ * string is already known, saving a strlen() call.
+ *
+ * \sa ralloc_asprintf_append
+ *
+ * \param str   The string to be updated.
+ * \param start The index to start appending new data at.
+ * \param fmt   A printf-style formatting string
+ *
+ * \p str will be updated to the new pointer unless allocation fails.
+ * \p start will be increased by the length of the newly formatted text.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_asprintf_rewrite_tail(char **str, size_t *start,
+				  const char *fmt, ...)
+				  PRINTFLIKE(3, 4);
+
+/**
+ * Rewrite the tail of an existing string, starting at a given index.
+ *
+ * Overwrites the contents of *str starting at \p start with newly formatted
+ * text, including a new null-terminator.  Allocates more memory as necessary.
+ *
+ * This can be used to append formatted text when the length of the existing
+ * string is already known, saving a strlen() call.
+ *
+ * \sa ralloc_vasprintf_append
+ *
+ * \param str   The string to be updated.
+ * \param start The index to start appending new data at.
+ * \param fmt   A printf-style formatting string
+ * \param args  A va_list containing the data to be formatted
+ *
+ * \p str will be updated to the new pointer unless allocation fails.
+ * \p start will be increased by the length of the newly formatted text.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
+				   va_list args);
+
+/**
+ * Append formatted text to the supplied string.
+ *
+ * This is equivalent to
+ * \code
+ * ralloc_asprintf_rewrite_tail(str, strlen(*str), fmt, ...)
+ * \endcode
+ *
+ * \sa ralloc_asprintf
+ * \sa ralloc_asprintf_rewrite_tail
+ * \sa ralloc_strcat
+ *
+ * \p str will be updated to the new pointer unless allocation fails.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_asprintf_append (char **str, const char *fmt, ...)
+			     PRINTFLIKE(2, 3);
+
+/**
+ * Append formatted text to the supplied string, given a va_list.
+ *
+ * This is equivalent to
+ * \code
+ * ralloc_vasprintf_rewrite_tail(str, strlen(*str), fmt, args)
+ * \endcode
+ *
+ * \sa ralloc_vasprintf
+ * \sa ralloc_vasprintf_rewrite_tail
+ * \sa ralloc_strcat
+ *
+ * \p str will be updated to the new pointer unless allocation fails.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_vasprintf_append(char **str, const char *fmt, va_list args);
+/// @}
+
+/**
+ * Declare C++ new and delete operators which use ralloc.
+ *
+ * Placing this macro in the body of a class makes it possible to do:
+ *
+ * TYPE *var = new(mem_ctx) TYPE(...);
+ * delete var;
+ *
+ * which is more idiomatic in C++ than calling ralloc.
+ */
+#define DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(TYPE, ALLOC_FUNC)           \
+private:                                                                 \
+   static void _ralloc_destructor(void *p)                               \
+   {                                                                     \
+      reinterpret_cast<TYPE *>(p)->TYPE::~TYPE();                        \
+   }                                                                     \
+public:                                                                  \
+   static void* operator new(size_t size, void *mem_ctx)                 \
+   {                                                                     \
+      void *p = ALLOC_FUNC(mem_ctx, size);                               \
+      assert(p != NULL);                                                 \
+      if (!HAS_TRIVIAL_DESTRUCTOR(TYPE))                                 \
+         ralloc_set_destructor(p, _ralloc_destructor);                   \
+      return p;                                                          \
+   }                                                                     \
+                                                                         \
+   static void operator delete(void *p)                                  \
+   {                                                                     \
+      /* The object's destructor is guaranteed to have already been      \
+       * called by the delete operator at this point -- Make sure it's   \
+       * not called again.                                               \
+       */                                                                \
+      if (!HAS_TRIVIAL_DESTRUCTOR(TYPE))                                 \
+         ralloc_set_destructor(p, NULL);                                 \
+      ralloc_free(p);                                                    \
+   }
+
+#define DECLARE_RALLOC_CXX_OPERATORS(type) \
+   DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, ralloc_size)
+
+#define DECLARE_RZALLOC_CXX_OPERATORS(type) \
+   DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, rzalloc_size)
+
+#define DECLARE_LINEAR_ALLOC_CXX_OPERATORS(type) \
+   DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, linear_alloc_child)
+
+#define DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(type) \
+   DECLARE_ALLOC_CXX_OPERATORS_TEMPLATE(type, linear_zalloc_child)
+
+
+/**
+ * Do a fast allocation from the linear buffer, also known as the child node
+ * from the allocator's point of view. It can't be freed directly. You have
+ * to free the parent or the ralloc parent.
+ *
+ * \param parent   parent node of the linear allocator
+ * \param size     size to allocate (max 32 bits)
+ */
+void *linear_alloc_child(void *parent, unsigned size);
+
+/**
+ * Allocate a parent node that will hold linear buffers. The returned
+ * allocation is actually the first child node, but it's also the handle
+ * of the parent node. Use it for all child node allocations.
+ *
+ * \param ralloc_ctx  ralloc context, must not be NULL
+ * \param size        size to allocate (max 32 bits)
+ */
+void *linear_alloc_parent(void *ralloc_ctx, unsigned size);
+
+/**
+ * Same as linear_alloc_child, but also clears memory.
+ */
+void *linear_zalloc_child(void *parent, unsigned size);
+
+/**
+ * Same as linear_alloc_parent, but also clears memory.
+ */
+void *linear_zalloc_parent(void *ralloc_ctx, unsigned size);
+
+/**
+ * Free the linear parent node. This will free all child nodes too.
+ * Freeing the ralloc parent will also free this.
+ */
+void linear_free_parent(void *ptr);
+
+/**
+ * Same as ralloc_steal, but steals the linear parent node.
+ */
+void ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr);
+
+/**
+ * Return the ralloc parent of the linear parent node.
+ */
+void *ralloc_parent_of_linear_parent(void *ptr);
+
+/**
+ * Same as realloc except that the linear allocator doesn't free child nodes,
+ * so it's reduced to memory duplication. It's used in places where
+ * reallocation is required. Don't use it often. It's much slower than
+ * realloc.
+ */
+void *linear_realloc(void *parent, void *old, unsigned new_size);
+
+/* The functions below have the same semantics as their ralloc counterparts,
+ * except that they always allocate a linear child node.
+ */
+char *linear_strdup(void *parent, const char *str);
+char *linear_asprintf(void *parent, const char *fmt, ...);
+char *linear_vasprintf(void *parent, const char *fmt, va_list args);
+bool linear_asprintf_append(void *parent, char **str, const char *fmt, ...);
+bool linear_vasprintf_append(void *parent, char **str, const char *fmt,
+                             va_list args);
+bool linear_asprintf_rewrite_tail(void *parent, char **str, size_t *start,
+                                  const char *fmt, ...);
+bool linear_vasprintf_rewrite_tail(void *parent, char **str, size_t *start,
+                                   const char *fmt, va_list args);
+bool linear_strcat(void *parent, char **dest, const char *str);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif
diff --git a/src/mesa/util/xxhash.h b/src/mesa/util/xxhash.h
new file mode 100644
index 0000000..eb9e865
--- /dev/null
+++ b/src/mesa/util/xxhash.h
@@ -0,0 +1,1446 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+Note : SMHasher's CRC32 implementation is not the fastest one.
+Other speed-oriented implementations can be faster,
+especially in combination with PCLMUL instruction :
+http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+/* Mesa leaves strict aliasing on in the compiler, and this code likes to
+ * dereference the passed in data as u32*, which means that the compiler is
+ * free to move the u32 read before the write of the struct members being
+ * hashed, and in practice it did in freedreno.  Forcing these two things
+ * prevents it.
+ */
+#define XXH_FORCE_ALIGN_CHECK 0
+#define XXH_FORCE_MEMORY_ACCESS 0
+
+#include "util/compiler.h" /* for FALLTHROUGH */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+/* ****************************
+ *  API modifier
+ ******************************/
+/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ *  This build macro includes xxhash functions in `static` mode
+ *  in order to inline them, and remove their symbol from the public list.
+ *  Inlining offers great performance improvement on small keys,
+ *  and dramatic ones when length is expressed as a compile-time constant.
+ *  See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
+ *  Methodology :
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ *  It's not useful to compile and link it as a separate object.
+ */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  ifndef XXH_STATIC_LINKING_ONLY
+#    define XXH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+     /* this version may generate warnings for unused static functions */
+#    define XXH_PUBLIC_API static
+#  endif
+#else
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#    ifdef XXH_EXPORT
+#      define XXH_PUBLIC_API __declspec(dllexport)
+#    elif XXH_IMPORT
+#      define XXH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXH_PUBLIC_API   /* do nothing */
+#  endif
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/*! XXH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    7
+#define XXH_VERSION_RELEASE  2
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+*  Definitions
+******************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint32_t XXH32_hash_t;
+#else
+#   include <limits.h>
+#   if UINT_MAX == 0xFFFFFFFFUL
+      typedef unsigned int XXH32_hash_t;
+#   else
+#     if ULONG_MAX == 0xFFFFFFFFUL
+        typedef unsigned long XXH32_hash_t;
+#     else
+#       error "unsupported platform : need a 32-bit type"
+#     endif
+#   endif
+#endif
+
+/*! XXH32() :
+    Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
+
+/*******   Streaming   *******/
+
+/*
+ * Streaming functions generate the xxHash value from an incrememtal input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * XXH state must first be allocated, using XXH*_createState() .
+ *
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
+ *
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a digest,
+ * and generate some new hash values later on, by invoking again XXH*_digest().
+ *
+ * When done, release the state, using XXH*_freeState().
+ */
+
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+/*******   Canonical representation   *******/
+
+/* Default return values from XXH functions are basic unsigned 32 and 64 bits.
+ * This the simplest and fastest format for further post-processing.
+ * However, this leaves open the question of what is the order of bytes,
+ * since little and big endian conventions will write the same number differently.
+ *
+ * The canonical representation settles this issue,
+ * by mandating big-endian convention,
+ * aka, the same convention as human-readable numbers (large digits first).
+ * When writing hash values to storage, sending them over a network, or printing them,
+ * it's highly recommended to use the canonical representation,
+ * to ensure portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values into and from canonical format.
+ */
+
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint64_t XXH64_hash_t;
+#else
+    /* the following type must have a width of 64-bit */
+    typedef unsigned long long XXH64_hash_t;
+#endif
+
+/*! XXH64() :
+ *  Returns the 64-bit hash of sequence of length @length stored at memory address @input.
+ *  @seed can be used to alter the result predictably.
+ *  This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);
+
+/*******   Streaming   *******/
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*******   Canonical representation   *******/
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+
+#endif  /* XXH_NO_LONG_LONG */
+
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
+#define XXHASH_H_STATIC_13879238742
+/* ************************************************************************************************
+   This section contains declarations which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   These declarations should only be used with static linking.
+   Never use them in association with dynamic linking !
+*************************************************************************************************** */
+
+/* These definitions are only present to allow
+ * static allocation of XXH state, on stack or in a struct for example.
+ * Never **ever** use members directly. */
+
+struct XXH32_state_s {
+   XXH32_hash_t total_len_32;
+   XXH32_hash_t large_len;
+   XXH32_hash_t v1;
+   XXH32_hash_t v2;
+   XXH32_hash_t v3;
+   XXH32_hash_t v4;
+   XXH32_hash_t mem32[4];
+   XXH32_hash_t memsize;
+   XXH32_hash_t reserved;   /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH32_state_t */
+
+
+#ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
+
+struct XXH64_state_s {
+   XXH64_hash_t total_len;
+   XXH64_hash_t v1;
+   XXH64_hash_t v2;
+   XXH64_hash_t v3;
+   XXH64_hash_t v4;
+   XXH64_hash_t mem64[4];
+   XXH32_hash_t memsize;
+   XXH32_hash_t reserved32;  /* required for padding anyway */
+   XXH64_hash_t reserved64;  /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH64_state_t */
+
+#endif  /* XXH_NO_LONG_LONG */
+
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  define XXH_IMPLEMENTATION
+#endif
+
+#endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
+
+
+
+/*-**********************************************************************
+*  xxHash implementation
+*  Functions implementation used to be hosted within xxhash.c .
+*  However, code inlining requires to place implementation in the header file.
+*  As a consequence, xxhash.c used to be included within xxhash.h .
+*  But some build systems don't like *.c inclusions.
+*  So the implementation is now directly integrated within xxhash.h .
+*  Another small advantage is that xxhash.c is no longer required in /includes .
+************************************************************************/
+
+#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
+   || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
+#  define XXH_IMPLEM_13a8737387
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
+#    define XXH_FORCE_MEMORY_ACCESS 2
+#  elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+  (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
+ * When this macro is enabled, xxHash actively checks input for null pointer.
+ * It it is, result for null input pointers is the same as a null-length input.
+ */
+#ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
+#  define XXH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+/*!XXH_REROLL:
+ * Whether to reroll XXH32_finalize, and XXH64_finalize,
+ * instead of using an unrolled jump table/if statement loop.
+ *
+ * This is automatically defined on -Os/-Oz on GCC and Clang. */
+#ifndef XXH_REROLL
+#  if defined(__OPTIMIZE_SIZE__)
+#    define XXH_REROLL 1
+#  else
+#    define XXH_REROLL 0
+#  endif
+#endif
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/*! Modify the local functions below should you wish to use some other memory routines
+*   for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/*! and for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#include <limits.h>   /* ULLONG_MAX */
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define XXH_FORCE_INLINE static __forceinline
+#  define XXH_NO_INLINE static __declspec(noinline)
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define XXH_FORCE_INLINE static inline __attribute__((always_inline))
+#      define XXH_NO_INLINE static __attribute__((noinline))
+#    else
+#      define XXH_FORCE_INLINE static inline
+#      define XXH_NO_INLINE static
+#    endif
+#  else
+#    define XXH_FORCE_INLINE static
+#    define XXH_NO_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+
+/* *************************************
+*  Debug
+***************************************/
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+#  define DEBUGLEVEL 0
+#endif
+
+#if (DEBUGLEVEL>=1)
+#  include <assert.h>   /* note : can still be disabled with NDEBUG */
+#  define XXH_ASSERT(c)   assert(c)
+#else
+#  define XXH_ASSERT(c)   ((void)0)
+#endif
+
+/* note : use after variable declarations */
+#define XXH_STATIC_ASSERT(c)  { enum { XXH_sa = 1/(int)(!!(c)) }; }
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef uint8_t  xxh_u8;
+#else
+  typedef unsigned char      xxh_u8;
+#endif
+typedef XXH32_hash_t xxh_u32;
+
+
+/* ***   Memory access   *** */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static xxh_u32 XXH_read32(const void* memPtr)
+{
+    xxh_u32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ***   Endianess   *** */
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+#  if defined(_WIN32) /* Windows is always little endian */ \
+     || defined(__LITTLE_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define XXH_CPU_LITTLE_ENDIAN 1
+#  elif defined(__BIG_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXH_CPU_LITTLE_ENDIAN 0
+#  else
+static int XXH_isLittleEndian(void)
+{
+    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
+#  endif
+#endif
+
+
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+#if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
+#  define XXH_rotl32 __builtin_rotateleft32
+#  define XXH_rotl64 __builtin_rotateleft64
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXH_swap32 (xxh_u32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+}
+
+static xxh_u32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+XXH_FORCE_INLINE xxh_u32
+XXH_readLE32_align(const void* ptr, XXH_alignment align)
+{
+    if (align==XXH_unaligned) {
+        return XXH_readLE32(ptr);
+    } else {
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
+    }
+}
+
+
+/* *************************************
+*  Misc
+***************************************/
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+*  32-bit hash functions
+*********************************************************************/
+static const xxh_u32 PRIME32_1 = 0x9E3779B1U;   /* 0b10011110001101110111100110110001 */
+static const xxh_u32 PRIME32_2 = 0x85EBCA77U;   /* 0b10000101111010111100101001110111 */
+static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU;   /* 0b11000010101100101010111000111101 */
+static const xxh_u32 PRIME32_4 = 0x27D4EB2FU;   /* 0b00100111110101001110101100101111 */
+static const xxh_u32 PRIME32_5 = 0x165667B1U;   /* 0b00010110010101100110011110110001 */
+
+static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
+{
+    acc += input * PRIME32_2;
+    acc  = XXH_rotl32(acc, 13);
+    acc *= PRIME32_1;
+#if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+    /* UGLY HACK:
+     * This inline assembly hack forces acc into a normal register. This is the
+     * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
+     * (pragmas and attributes don't work for some resason) without globally
+     * disabling SSE4.1.
+     *
+     * The reason we want to avoid vectorization is because despite working on
+     * 4 integers at a time, there are multiple factors slowing XXH32 down on
+     * SSE4:
+     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
+     *   making it slightly slower to multiply four integers at once compared to four
+     *   integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
+     *   still not worth it to go into SSE just to multiply unless doing a long operation.
+     *
+     * - Four instructions are required to rotate,
+     *      movqda tmp,  v // not required with VEX encoding
+     *      pslld  tmp, 13 // tmp <<= 13
+     *      psrld  v,   19 // x >>= 19
+     *      por    v,  tmp // x |= tmp
+     *   compared to one for scalar:
+     *      roll   v, 13    // reliably fast across the board
+     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
+     *
+     * - Instruction level parallelism is actually more beneficial here because the
+     *   SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
+     *   while v3 can multiply. SSE forces them to operate together.
+     *
+     * How this hack works:
+     * __asm__(""       // Declare an assembly block but don't declare any instructions
+     *          :       // However, as an Input/Output Operand,
+     *          "+r"    // constrain a read/write operand (+) as a general purpose register (r).
+     *          (acc)   // and set acc as the operand
+     * );
+     *
+     * Because of the 'r', the compiler has promised that seed will be in a
+     * general purpose register and the '+' says that it will be 'read/write',
+     * so it has to assume it has changed. It is like volatile without all the
+     * loads and stores.
+     *
+     * Since the argument has to be in a normal register (not an SSE register),
+     * each time XXH32_round is called, it is impossible to vectorize. */
+    __asm__("" : "+r" (acc));
+#endif
+    return acc;
+}
+
+/* mix all bits */
+static xxh_u32 XXH32_avalanche(xxh_u32 h32)
+{
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+    return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+
+static xxh_u32
+XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+#define PROCESS1               \
+    h32 += (*ptr++) * PRIME32_5; \
+    h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+
+#define PROCESS4                         \
+    h32 += XXH_get32bits(ptr) * PRIME32_3; \
+    ptr+=4;                                \
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+
+    /* Compact rerolled version */
+    if (XXH_REROLL) {
+        len &= 15;
+        while (len >= 4) {
+            PROCESS4;
+            len -= 4;
+        }
+        while (len > 0) {
+            PROCESS1;
+            --len;
+        }
+        return XXH32_avalanche(h32);
+    } else {
+         switch(len&15) /* or switch(bEnd - p) */ {
+           case 12:      PROCESS4;
+                         FALLTHROUGH;
+           case 8:       PROCESS4;
+                         FALLTHROUGH;
+           case 4:       PROCESS4;
+                         return XXH32_avalanche(h32);
+
+           case 13:      PROCESS4;
+                         FALLTHROUGH;
+           case 9:       PROCESS4;
+                         FALLTHROUGH;
+           case 5:       PROCESS4;
+                         PROCESS1;
+                         return XXH32_avalanche(h32);
+
+           case 14:      PROCESS4;
+                         FALLTHROUGH;
+           case 10:      PROCESS4;
+                         FALLTHROUGH;
+           case 6:       PROCESS4;
+                         PROCESS1;
+                         PROCESS1;
+                         return XXH32_avalanche(h32);
+
+           case 15:      PROCESS4;
+                         FALLTHROUGH;
+           case 11:      PROCESS4;
+                         FALLTHROUGH;
+           case 7:       PROCESS4;
+                         FALLTHROUGH;
+           case 3:       PROCESS1;
+                         FALLTHROUGH;
+           case 2:       PROCESS1;
+                         FALLTHROUGH;
+           case 1:       PROCESS1;
+                         FALLTHROUGH;
+           case 0:       return XXH32_avalanche(h32);
+        }
+        XXH_ASSERT(0);
+        return h32;   /* reaching this point is deemed impossible */
+    }
+}
+
+XXH_FORCE_INLINE xxh_u32
+XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
+{
+    const xxh_u8* bEnd = input + len;
+    xxh_u32 h32;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+    if (input==NULL) {
+        len=0;
+        bEnd=input=(const xxh_u8*)(size_t)16;
+    }
+#endif
+
+    if (len>=16) {
+        const xxh_u8* const limit = bEnd - 15;
+        xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2;
+        xxh_u32 v2 = seed + PRIME32_2;
+        xxh_u32 v3 = seed + 0;
+        xxh_u32 v4 = seed - PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
+            v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
+            v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
+            v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
+        } while (input < limit);
+
+        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
+            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (xxh_u32)len;
+
+    return XXH32_finalize(h32, input, len&15, align);
+}
+
+
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, (const xxh_u8*)input, len);
+    return XXH32_digest(&state);
+
+#else
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+    }   }
+
+    return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+#endif
+}
+
+
+
+/*******   Hash streaming   *******/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
+{
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    /* do not write into reserved, planned to be removed in a future version */
+    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+    return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode
+XXH32_update(XXH32_state_t* state, const void* input, size_t len)
+{
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
+#endif
+
+    {   const xxh_u8* p = (const xxh_u8*)input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len_32 += (XXH32_hash_t)len;
+        state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
+
+        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
+            state->memsize += (XXH32_hash_t)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* some data left from previous update */
+            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
+            {   const xxh_u32* p32 = state->mem32;
+                state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
+                state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
+                state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
+                state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
+            }
+            p += 16-state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p <= bEnd-16) {
+            const xxh_u8* const limit = bEnd - 16;
+            xxh_u32 v1 = state->v1;
+            xxh_u32 v2 = state->v2;
+            xxh_u32 v3 = state->v3;
+            xxh_u32 v4 = state->v4;
+
+            do {
+                v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
+                v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
+                v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
+                v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
+            } while (p<=limit);
+
+            state->v1 = v1;
+            state->v2 = v2;
+            state->v3 = v3;
+            state->v4 = v4;
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
+{
+    xxh_u32 h32;
+
+    if (state->large_len) {
+        h32 = XXH_rotl32(state->v1, 1)
+            + XXH_rotl32(state->v2, 7)
+            + XXH_rotl32(state->v3, 12)
+            + XXH_rotl32(state->v4, 18);
+    } else {
+        h32 = state->v3 /* == seed */ + PRIME32_5;
+    }
+
+    h32 += state->total_len_32;
+
+    return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
+}
+
+
+/*******   Canonical representation   *******/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file or buffer, remaining comparable across different systems.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bit hash functions
+*********************************************************************/
+
+/*******   Memory access   *******/
+
+typedef XXH64_hash_t xxh_u64;
+
+
+/*! XXH_REROLL_XXH64:
+ * Whether to reroll the XXH64_finalize() loop.
+ *
+ * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain
+ * on 64-bit hosts, as only one jump is required.
+ *
+ * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers,
+ * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes
+ * ridiculously large (the largest function in the binary on i386!), and rerolling it saves
+ * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better
+ * and is more likely to be inlined by the compiler.
+ *
+ * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */
+#ifndef XXH_REROLL_XXH64
+#  if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
+   || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
+     || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
+     || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
+     || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
+   || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
+#    define XXH_REROLL_XXH64 1
+#  else
+#    define XXH_REROLL_XXH64 0
+#  endif
+#endif /* !defined(XXH_REROLL_XXH64) */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+    xxh_u64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap64 __builtin_bswap64
+#else
+static xxh_u64 XXH_swap64 (xxh_u64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+}
+
+static xxh_u64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+XXH_FORCE_INLINE xxh_u64
+XXH_readLE64_align(const void* ptr, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return XXH_readLE64(ptr);
+    else
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
+}
+
+
+/*******   xxh64   *******/
+
+static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL;   /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
+static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;   /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
+static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL;   /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
+static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL;   /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
+static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL;   /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
+{
+    acc += input * PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= PRIME64_1;
+    return acc;
+}
+
+static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * PRIME64_1 + PRIME64_4;
+    return acc;
+}
+
+static xxh_u64 XXH64_avalanche(xxh_u64 h64)
+{
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, align)
+
+static xxh_u64
+XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+#define PROCESS1_64            \
+    h64 ^= (*ptr++) * PRIME64_5; \
+    h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+
+#define PROCESS4_64          \
+    h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \
+    ptr+=4;                    \
+    h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+
+#define PROCESS8_64 {        \
+    xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
+    ptr+=8;                    \
+    h64 ^= k1;               \
+    h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
+}
+
+    /* Rerolled version for 32-bit targets is faster and much smaller. */
+    if (XXH_REROLL || XXH_REROLL_XXH64) {
+        len &= 31;
+        while (len >= 8) {
+            PROCESS8_64;
+            len -= 8;
+        }
+        if (len >= 4) {
+            PROCESS4_64;
+            len -= 4;
+        }
+        while (len > 0) {
+            PROCESS1_64;
+            --len;
+        }
+         return  XXH64_avalanche(h64);
+    } else {
+        switch(len & 31) {
+           case 24: PROCESS8_64;
+                    FALLTHROUGH;
+           case 16: PROCESS8_64;
+                    FALLTHROUGH;
+           case  8: PROCESS8_64;
+                    return XXH64_avalanche(h64);
+
+           case 28: PROCESS8_64;
+                    FALLTHROUGH;
+           case 20: PROCESS8_64;
+                    FALLTHROUGH;
+           case 12: PROCESS8_64;
+                    FALLTHROUGH;
+           case  4: PROCESS4_64;
+                    return XXH64_avalanche(h64);
+
+           case 25: PROCESS8_64;
+                    FALLTHROUGH;
+           case 17: PROCESS8_64;
+                    FALLTHROUGH;
+           case  9: PROCESS8_64;
+                    PROCESS1_64;
+                    return XXH64_avalanche(h64);
+
+           case 29: PROCESS8_64;
+                    FALLTHROUGH;
+           case 21: PROCESS8_64;
+                    FALLTHROUGH;
+           case 13: PROCESS8_64;
+                    FALLTHROUGH;
+           case  5: PROCESS4_64;
+                    PROCESS1_64;
+                    return XXH64_avalanche(h64);
+
+           case 26: PROCESS8_64;
+                    FALLTHROUGH;
+           case 18: PROCESS8_64;
+                    FALLTHROUGH;
+           case 10: PROCESS8_64;
+                    PROCESS1_64;
+                    PROCESS1_64;
+                    return XXH64_avalanche(h64);
+
+           case 30: PROCESS8_64;
+                    FALLTHROUGH;
+           case 22: PROCESS8_64;
+                    FALLTHROUGH;
+           case 14: PROCESS8_64;
+                    FALLTHROUGH;
+           case  6: PROCESS4_64;
+                    PROCESS1_64;
+                    PROCESS1_64;
+                    return XXH64_avalanche(h64);
+
+           case 27: PROCESS8_64;
+                    FALLTHROUGH;
+           case 19: PROCESS8_64;
+                    FALLTHROUGH;
+           case 11: PROCESS8_64;
+                    PROCESS1_64;
+                    PROCESS1_64;
+                    PROCESS1_64;
+                    return XXH64_avalanche(h64);
+
+           case 31: PROCESS8_64;
+                    FALLTHROUGH;
+           case 23: PROCESS8_64;
+                    FALLTHROUGH;
+           case 15: PROCESS8_64;
+                    FALLTHROUGH;
+           case  7: PROCESS4_64;
+                    FALLTHROUGH;
+           case  3: PROCESS1_64;
+                    FALLTHROUGH;
+           case  2: PROCESS1_64;
+                    FALLTHROUGH;
+           case  1: PROCESS1_64;
+                    FALLTHROUGH;
+           case  0: return XXH64_avalanche(h64);
+        }
+    }
+    /* impossible to reach */
+    XXH_ASSERT(0);
+    return 0;  /* unreachable, but some compilers complain without it */
+}
+
+XXH_FORCE_INLINE xxh_u64
+XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
+{
+    const xxh_u8* bEnd = input + len;
+    xxh_u64 h64;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+    if (input==NULL) {
+        len=0;
+        bEnd=input=(const xxh_u8*)(size_t)32;
+    }
+#endif
+
+    if (len>=32) {
+        const xxh_u8* const limit = bEnd - 32;
+        xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2;
+        xxh_u64 v2 = seed + PRIME64_2;
+        xxh_u64 v3 = seed + 0;
+        xxh_u64 v4 = seed - PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
+        } while (input<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (xxh_u64) len;
+
+    return XXH64_finalize(h64, input, len, align);
+}
+
+
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, (const xxh_u8*)input, len);
+    return XXH64_digest(&state);
+
+#else
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+    }   }
+
+    return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+
+#endif
+}
+
+/*******   Hash Streaming   *******/
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
+{
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+     /* do not write into reserved64, might be removed in a future version */
+    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode
+XXH64_update (XXH64_state_t* state, const void* input, size_t len)
+{
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
+#endif
+
+    {   const xxh_u8* p = (const xxh_u8*)input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len += len;
+
+        if (state->memsize + len < 32) {  /* fill in tmp buffer */
+            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
+            state->memsize += (xxh_u32)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* tmp buffer is full */
+            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
+            state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
+            state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
+            state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
+            state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
+            p += 32-state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p+32 <= bEnd) {
+            const xxh_u8* const limit = bEnd - 32;
+            xxh_u64 v1 = state->v1;
+            xxh_u64 v2 = state->v2;
+            xxh_u64 v3 = state->v3;
+            xxh_u64 v4 = state->v4;
+
+            do {
+                v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
+                v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
+                v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
+                v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
+            } while (p<=limit);
+
+            state->v1 = v1;
+            state->v2 = v2;
+            state->v3 = v3;
+            state->v4 = v4;
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
+{
+    xxh_u64 h64;
+
+    if (state->total_len >= 32) {
+        xxh_u64 const v1 = state->v1;
+        xxh_u64 const v2 = state->v2;
+        xxh_u64 const v3 = state->v3;
+        xxh_u64 const v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
+        h64  = state->v3 /*seed*/ + PRIME64_5;
+    }
+
+    h64 += (xxh_u64) state->total_len;
+
+    return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
+}
+
+
+/******* Canonical representation   *******/
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+
+
+
+/* *********************************************************************
+*  XXH3
+*  New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+
+/* #include "xxh3.h" */
+
+
+#endif  /* XXH_NO_LONG_LONG */
+
+
+#endif  /* XXH_IMPLEMENTATION */
+
+
+#if defined (__cplusplus)
+}
+#endif