You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
144 lines
4.6 KiB
144 lines
4.6 KiB
// Copyright 2018 Klaus Post. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
|
|
|
// Package fse provides Finite State Entropy encoding and decoding.
|
|
//
|
|
// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
|
|
// for byte blocks as implemented in zstd.
|
|
//
|
|
// See https://github.com/klauspost/compress/tree/master/fse for more information.
|
|
package fse
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"math/bits"
|
|
)
|
|
|
|
const (
|
|
/*!MEMORY_USAGE :
|
|
* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
|
|
* Increasing memory usage improves compression ratio
|
|
* Reduced memory usage can improve speed, due to cache effect
|
|
* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
|
|
maxMemoryUsage = 14
|
|
defaultMemoryUsage = 13
|
|
|
|
maxTableLog = maxMemoryUsage - 2
|
|
maxTablesize = 1 << maxTableLog
|
|
defaultTablelog = defaultMemoryUsage - 2
|
|
minTablelog = 5
|
|
maxSymbolValue = 255
|
|
)
|
|
|
|
var (
|
|
// ErrIncompressible is returned when input is judged to be too hard to compress.
|
|
ErrIncompressible = errors.New("input is not compressible")
|
|
|
|
// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
|
|
ErrUseRLE = errors.New("input is single value repeated")
|
|
)
|
|
|
|
// Scratch provides temporary storage for compression and decompression.
|
|
type Scratch struct {
|
|
// Private
|
|
count [maxSymbolValue + 1]uint32
|
|
norm [maxSymbolValue + 1]int16
|
|
br byteReader
|
|
bits bitReader
|
|
bw bitWriter
|
|
ct cTable // Compression tables.
|
|
decTable []decSymbol // Decompression table.
|
|
maxCount int // count of the most probable symbol
|
|
|
|
// Per block parameters.
|
|
// These can be used to override compression parameters of the block.
|
|
// Do not touch, unless you know what you are doing.
|
|
|
|
// Out is output buffer.
|
|
// If the scratch is re-used before the caller is done processing the output,
|
|
// set this field to nil.
|
|
// Otherwise the output buffer will be re-used for next Compression/Decompression step
|
|
// and allocation will be avoided.
|
|
Out []byte
|
|
|
|
// DecompressLimit limits the maximum decoded size acceptable.
|
|
// If > 0 decompression will stop when approximately this many bytes
|
|
// has been decoded.
|
|
// If 0, maximum size will be 2GB.
|
|
DecompressLimit int
|
|
|
|
symbolLen uint16 // Length of active part of the symbol table.
|
|
actualTableLog uint8 // Selected tablelog.
|
|
zeroBits bool // no bits has prob > 50%.
|
|
clearCount bool // clear count
|
|
|
|
// MaxSymbolValue will override the maximum symbol value of the next block.
|
|
MaxSymbolValue uint8
|
|
|
|
// TableLog will attempt to override the tablelog for the next block.
|
|
TableLog uint8
|
|
}
|
|
|
|
// Histogram allows to populate the histogram and skip that step in the compression,
|
|
// It otherwise allows to inspect the histogram when compression is done.
|
|
// To indicate that you have populated the histogram call HistogramFinished
|
|
// with the value of the highest populated symbol, as well as the number of entries
|
|
// in the most populated entry. These are accepted at face value.
|
|
// The returned slice will always be length 256.
|
|
func (s *Scratch) Histogram() []uint32 {
|
|
return s.count[:]
|
|
}
|
|
|
|
// HistogramFinished can be called to indicate that the histogram has been populated.
|
|
// maxSymbol is the index of the highest set symbol of the next data segment.
|
|
// maxCount is the number of entries in the most populated entry.
|
|
// These are accepted at face value.
|
|
func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
|
|
s.maxCount = maxCount
|
|
s.symbolLen = uint16(maxSymbol) + 1
|
|
s.clearCount = maxCount != 0
|
|
}
|
|
|
|
// prepare will prepare and allocate scratch tables used for both compression and decompression.
|
|
func (s *Scratch) prepare(in []byte) (*Scratch, error) {
|
|
if s == nil {
|
|
s = &Scratch{}
|
|
}
|
|
if s.MaxSymbolValue == 0 {
|
|
s.MaxSymbolValue = 255
|
|
}
|
|
if s.TableLog == 0 {
|
|
s.TableLog = defaultTablelog
|
|
}
|
|
if s.TableLog > maxTableLog {
|
|
return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
|
|
}
|
|
if cap(s.Out) == 0 {
|
|
s.Out = make([]byte, 0, len(in))
|
|
}
|
|
if s.clearCount && s.maxCount == 0 {
|
|
for i := range s.count {
|
|
s.count[i] = 0
|
|
}
|
|
s.clearCount = false
|
|
}
|
|
s.br.init(in)
|
|
if s.DecompressLimit == 0 {
|
|
// Max size 2GB.
|
|
s.DecompressLimit = (2 << 30) - 1
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
// tableStep returns the next table index.
|
|
func tableStep(tableSize uint32) uint32 {
|
|
return (tableSize >> 1) + (tableSize >> 3) + 3
|
|
}
|
|
|
|
func highBits(val uint32) (n uint32) {
|
|
return uint32(bits.Len32(val) - 1)
|
|
}
|
|
|