You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
288 lines
9.5 KiB
288 lines
9.5 KiB
5 years ago
|
package brotli
|
||
|
|
||
|
import "encoding/binary"
|
||
|
|
||
|
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||
|
|
||
|
Distributed under MIT license.
|
||
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||
|
*/
|
||
|
|
||
|
func (*h10) HashTypeLength() uint {
|
||
|
return 4
|
||
|
}
|
||
|
|
||
|
func (*h10) StoreLookahead() uint {
|
||
|
return 128
|
||
|
}
|
||
|
|
||
|
func hashBytesH10(data []byte) uint32 {
|
||
|
var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
|
||
|
|
||
|
/* The higher bits contain more mixture from the multiplication,
|
||
|
so we take our results from there. */
|
||
|
return h >> (32 - 17)
|
||
|
}
|
||
|
|
||
|
/* A (forgetful) hash table where each hash bucket contains a binary tree of
|
||
|
sequences whose first 4 bytes share the same hash code.
|
||
|
Each sequence is 128 long and is identified by its starting
|
||
|
position in the input data. The binary tree is sorted by the lexicographic
|
||
|
order of the sequences, and it is also a max-heap with respect to the
|
||
|
starting positions. */
|
||
|
type h10 struct {
|
||
|
hasherCommon
|
||
|
window_mask_ uint
|
||
|
buckets_ [1 << 17]uint32
|
||
|
invalid_pos_ uint32
|
||
|
forest []uint32
|
||
|
}
|
||
|
|
||
|
func (h *h10) Initialize(params *encoderParams) {
|
||
|
h.window_mask_ = (1 << params.lgwin) - 1
|
||
|
h.invalid_pos_ = uint32(0 - h.window_mask_)
|
||
|
var num_nodes uint = uint(1) << params.lgwin
|
||
|
h.forest = make([]uint32, 2*num_nodes)
|
||
|
}
|
||
|
|
||
|
func (h *h10) Prepare(one_shot bool, input_size uint, data []byte) {
|
||
|
var invalid_pos uint32 = h.invalid_pos_
|
||
|
var i uint32
|
||
|
for i = 0; i < 1<<17; i++ {
|
||
|
h.buckets_[i] = invalid_pos
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func leftChildIndexH10(self *h10, pos uint) uint {
|
||
|
return 2 * (pos & self.window_mask_)
|
||
|
}
|
||
|
|
||
|
func rightChildIndexH10(self *h10, pos uint) uint {
|
||
|
return 2*(pos&self.window_mask_) + 1
|
||
|
}
|
||
|
|
||
|
/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
|
||
|
hash bucket's binary tree is searched for matches and is re-rooted at the
|
||
|
current position.
|
||
|
|
||
|
If less than 128 data is available, the hash bucket of the
|
||
|
current position is searched for matches, but the state of the hash table
|
||
|
is not changed, since we can not know the final sorting order of the
|
||
|
current (incomplete) sequence.
|
||
|
|
||
|
This function must be called with increasing cur_ix positions. */
|
||
|
func storeAndFindMatchesH10(self *h10, data []byte, cur_ix uint, ring_buffer_mask uint, max_length uint, max_backward uint, best_len *uint, matches []backwardMatch) []backwardMatch {
|
||
|
var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
||
|
var max_comp_len uint = brotli_min_size_t(max_length, 128)
|
||
|
var should_reroot_tree bool = (max_length >= 128)
|
||
|
var key uint32 = hashBytesH10(data[cur_ix_masked:])
|
||
|
var forest []uint32 = self.forest
|
||
|
var prev_ix uint = uint(self.buckets_[key])
|
||
|
var node_left uint = leftChildIndexH10(self, cur_ix)
|
||
|
var node_right uint = rightChildIndexH10(self, cur_ix)
|
||
|
var best_len_left uint = 0
|
||
|
var best_len_right uint = 0
|
||
|
var depth_remaining uint
|
||
|
/* The forest index of the rightmost node of the left subtree of the new
|
||
|
root, updated as we traverse and re-root the tree of the hash bucket. */
|
||
|
|
||
|
/* The forest index of the leftmost node of the right subtree of the new
|
||
|
root, updated as we traverse and re-root the tree of the hash bucket. */
|
||
|
|
||
|
/* The match length of the rightmost node of the left subtree of the new
|
||
|
root, updated as we traverse and re-root the tree of the hash bucket. */
|
||
|
|
||
|
/* The match length of the leftmost node of the right subtree of the new
|
||
|
root, updated as we traverse and re-root the tree of the hash bucket. */
|
||
|
if should_reroot_tree {
|
||
|
self.buckets_[key] = uint32(cur_ix)
|
||
|
}
|
||
|
|
||
|
for depth_remaining = 64; ; depth_remaining-- {
|
||
|
var backward uint = cur_ix - prev_ix
|
||
|
var prev_ix_masked uint = prev_ix & ring_buffer_mask
|
||
|
if backward == 0 || backward > max_backward || depth_remaining == 0 {
|
||
|
if should_reroot_tree {
|
||
|
forest[node_left] = self.invalid_pos_
|
||
|
forest[node_right] = self.invalid_pos_
|
||
|
}
|
||
|
|
||
|
break
|
||
|
}
|
||
|
{
|
||
|
var cur_len uint = brotli_min_size_t(best_len_left, best_len_right)
|
||
|
var len uint
|
||
|
assert(cur_len <= 128)
|
||
|
len = cur_len + findMatchLengthWithLimit(data[cur_ix_masked+cur_len:], data[prev_ix_masked+cur_len:], max_length-cur_len)
|
||
|
if matches != nil && len > *best_len {
|
||
|
*best_len = uint(len)
|
||
|
initBackwardMatch(&matches[0], backward, uint(len))
|
||
|
matches = matches[1:]
|
||
|
}
|
||
|
|
||
|
if len >= max_comp_len {
|
||
|
if should_reroot_tree {
|
||
|
forest[node_left] = forest[leftChildIndexH10(self, prev_ix)]
|
||
|
forest[node_right] = forest[rightChildIndexH10(self, prev_ix)]
|
||
|
}
|
||
|
|
||
|
break
|
||
|
}
|
||
|
|
||
|
if data[cur_ix_masked+len] > data[prev_ix_masked+len] {
|
||
|
best_len_left = uint(len)
|
||
|
if should_reroot_tree {
|
||
|
forest[node_left] = uint32(prev_ix)
|
||
|
}
|
||
|
|
||
|
node_left = rightChildIndexH10(self, prev_ix)
|
||
|
prev_ix = uint(forest[node_left])
|
||
|
} else {
|
||
|
best_len_right = uint(len)
|
||
|
if should_reroot_tree {
|
||
|
forest[node_right] = uint32(prev_ix)
|
||
|
}
|
||
|
|
||
|
node_right = leftChildIndexH10(self, prev_ix)
|
||
|
prev_ix = uint(forest[node_right])
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return matches
|
||
|
}
|
||
|
|
||
|
/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
||
|
length of max_length and stores the position cur_ix in the hash table.
|
||
|
|
||
|
Sets *num_matches to the number of matches found, and stores the found
|
||
|
matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
||
|
sorted by strictly increasing length and (non-strictly) increasing
|
||
|
distance. */
|
||
|
func findAllMatchesH10(handle *h10, dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, cur_ix uint, max_length uint, max_backward uint, gap uint, params *encoderParams, matches []backwardMatch) uint {
|
||
|
var orig_matches []backwardMatch = matches
|
||
|
var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
||
|
var best_len uint = 1
|
||
|
var short_match_max_backward uint
|
||
|
if params.quality != hqZopflificationQuality {
|
||
|
short_match_max_backward = 16
|
||
|
} else {
|
||
|
short_match_max_backward = 64
|
||
|
}
|
||
|
var stop uint = cur_ix - short_match_max_backward
|
||
|
var dict_matches [maxStaticDictionaryMatchLen + 1]uint32
|
||
|
var i uint
|
||
|
if cur_ix < short_match_max_backward {
|
||
|
stop = 0
|
||
|
}
|
||
|
for i = cur_ix - 1; i > stop && best_len <= 2; i-- {
|
||
|
var prev_ix uint = i
|
||
|
var backward uint = cur_ix - prev_ix
|
||
|
if backward > max_backward {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
prev_ix &= ring_buffer_mask
|
||
|
if data[cur_ix_masked] != data[prev_ix] || data[cur_ix_masked+1] != data[prev_ix+1] {
|
||
|
continue
|
||
|
}
|
||
|
{
|
||
|
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||
|
if len > best_len {
|
||
|
best_len = uint(len)
|
||
|
initBackwardMatch(&matches[0], backward, uint(len))
|
||
|
matches = matches[1:]
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if best_len < max_length {
|
||
|
matches = storeAndFindMatchesH10(handle, data, cur_ix, ring_buffer_mask, max_length, max_backward, &best_len, matches)
|
||
|
}
|
||
|
|
||
|
for i = 0; i <= maxStaticDictionaryMatchLen; i++ {
|
||
|
dict_matches[i] = kInvalidMatch
|
||
|
}
|
||
|
{
|
||
|
var minlen uint = brotli_max_size_t(4, best_len+1)
|
||
|
if findAllStaticDictionaryMatches(dictionary, data[cur_ix_masked:], minlen, max_length, dict_matches[0:]) {
|
||
|
var maxlen uint = brotli_min_size_t(maxStaticDictionaryMatchLen, max_length)
|
||
|
var l uint
|
||
|
for l = minlen; l <= maxlen; l++ {
|
||
|
var dict_id uint32 = dict_matches[l]
|
||
|
if dict_id < kInvalidMatch {
|
||
|
var distance uint = max_backward + gap + uint(dict_id>>5) + 1
|
||
|
if distance <= params.dist.max_distance {
|
||
|
initDictionaryBackwardMatch(&matches[0], distance, l, uint(dict_id&31))
|
||
|
matches = matches[1:]
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return uint(-cap(matches) + cap(orig_matches))
|
||
|
}
|
||
|
|
||
|
/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
|
||
|
current sequence, without returning any matches.
|
||
|
REQUIRES: ix + 128 <= end-of-current-block */
|
||
|
func (h *h10) Store(data []byte, mask uint, ix uint) {
|
||
|
var max_backward uint = h.window_mask_ - windowGap + 1
|
||
|
/* Maximum distance is window size - 16, see section 9.1. of the spec. */
|
||
|
storeAndFindMatchesH10(h, data, ix, mask, 128, max_backward, nil, nil)
|
||
|
}
|
||
|
|
||
|
func (h *h10) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
||
|
var i uint = ix_start
|
||
|
var j uint = ix_start
|
||
|
if ix_start+63 <= ix_end {
|
||
|
i = ix_end - 63
|
||
|
}
|
||
|
|
||
|
if ix_start+512 <= i {
|
||
|
for ; j < i; j += 8 {
|
||
|
h.Store(data, mask, j)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for ; i < ix_end; i++ {
|
||
|
h.Store(data, mask, i)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (h *h10) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
||
|
if num_bytes >= h.HashTypeLength()-1 && position >= 128 {
|
||
|
var i_start uint = position - 128 + 1
|
||
|
var i_end uint = brotli_min_size_t(position, i_start+num_bytes)
|
||
|
/* Store the last `128 - 1` positions in the hasher.
|
||
|
These could not be calculated before, since they require knowledge
|
||
|
of both the previous and the current block. */
|
||
|
|
||
|
var i uint
|
||
|
for i = i_start; i < i_end; i++ {
|
||
|
/* Maximum distance is window size - 16, see section 9.1. of the spec.
|
||
|
Furthermore, we have to make sure that we don't look further back
|
||
|
from the start of the next block than the window size, otherwise we
|
||
|
could access already overwritten areas of the ring-buffer. */
|
||
|
var max_backward uint = h.window_mask_ - brotli_max_size_t(windowGap-1, position-i)
|
||
|
|
||
|
/* We know that i + 128 <= position + num_bytes, i.e. the
|
||
|
end of the current block and that we have at least
|
||
|
128 tail in the ring-buffer. */
|
||
|
storeAndFindMatchesH10(h, ringbuffer, i, ringbuffer_mask, 128, max_backward, nil, nil)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
|
||
|
const maxNumMatchesH10 = 128
|
||
|
|
||
|
func (*h10) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
|
||
|
panic("unimplemented")
|
||
|
}
|
||
|
|
||
|
func (*h10) PrepareDistanceCache(distance_cache []int) {
|
||
|
panic("unimplemented")
|
||
|
}
|