You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
257 lines
7.1 KiB
257 lines
7.1 KiB
4 years ago
|
/*
|
||
|
* Package xz Go Reader API
|
||
|
*
|
||
|
* Author: Michael Cross <https://github.com/xi2>
|
||
|
*
|
||
|
* This file has been put into the public domain.
|
||
|
* You can do whatever you want with this file.
|
||
|
*/
|
||
|
|
||
|
package xz
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"io"
|
||
|
)
|
||
|
|
||
|
// Package specific errors.
|
||
|
var (
|
||
|
ErrUnsupportedCheck = errors.New("xz: integrity check type not supported")
|
||
|
ErrMemlimit = errors.New("xz: LZMA2 dictionary size exceeds max")
|
||
|
ErrFormat = errors.New("xz: file format not recognized")
|
||
|
ErrOptions = errors.New("xz: compression options not supported")
|
||
|
ErrData = errors.New("xz: data is corrupt")
|
||
|
ErrBuf = errors.New("xz: data is truncated or corrupt")
|
||
|
)
|
||
|
|
||
|
// DefaultDictMax is the default maximum dictionary size in bytes used
|
||
|
// by the decoder. This value is sufficient to decompress files
|
||
|
// created with XZ Utils "xz -9".
|
||
|
const DefaultDictMax = 1 << 26 // 64 MiB
|
||
|
|
||
|
// inBufSize is the input buffer size used by the decoder.
|
||
|
const inBufSize = 1 << 13 // 8 KiB
|
||
|
|
||
|
// A Reader is an io.Reader that can be used to retrieve uncompressed
|
||
|
// data from an XZ file.
|
||
|
//
|
||
|
// In general, an XZ file can be a concatenation of other XZ
|
||
|
// files. Reads from the Reader return the concatenation of the
|
||
|
// uncompressed data of each.
|
||
|
type Reader struct {
|
||
|
Header
|
||
|
r io.Reader // the wrapped io.Reader
|
||
|
multistream bool // true if reader is in multistream mode
|
||
|
rEOF bool // true after io.EOF received on r
|
||
|
dEOF bool // true after decoder has completed
|
||
|
padding int // bytes of stream padding read (or -1)
|
||
|
in [inBufSize]byte // backing array for buf.in
|
||
|
buf *xzBuf // decoder input/output buffers
|
||
|
dec *xzDec // decoder state
|
||
|
err error // the result of the last decoder call
|
||
|
}
|
||
|
|
||
|
// NewReader creates a new Reader reading from r. The decompressor
|
||
|
// will use an LZMA2 dictionary size up to dictMax bytes in
|
||
|
// size. Passing a value of zero sets dictMax to DefaultDictMax. If
|
||
|
// an individual XZ stream requires a dictionary size greater than
|
||
|
// dictMax in order to decompress, Read will return ErrMemlimit.
|
||
|
//
|
||
|
// If NewReader is passed a value of nil for r then a Reader is
|
||
|
// created such that all read attempts will return io.EOF. This is
|
||
|
// useful if you just want to allocate memory for a Reader which will
|
||
|
// later be initialized with Reset.
|
||
|
//
|
||
|
// Due to internal buffering, the Reader may read more data than
|
||
|
// necessary from r.
|
||
|
func NewReader(r io.Reader, dictMax uint32) (*Reader, error) {
|
||
|
if dictMax == 0 {
|
||
|
dictMax = DefaultDictMax
|
||
|
}
|
||
|
z := &Reader{
|
||
|
r: r,
|
||
|
multistream: true,
|
||
|
padding: -1,
|
||
|
buf: &xzBuf{},
|
||
|
}
|
||
|
if r == nil {
|
||
|
z.rEOF, z.dEOF = true, true
|
||
|
}
|
||
|
z.dec = xzDecInit(dictMax, &z.Header)
|
||
|
var err error
|
||
|
if r != nil {
|
||
|
_, err = z.Read(nil) // read stream header
|
||
|
}
|
||
|
return z, err
|
||
|
}
|
||
|
|
||
|
// decode is a wrapper around xzDecRun that additionally handles
|
||
|
// stream padding. It treats the padding as a kind of stream that
|
||
|
// decodes to nothing.
|
||
|
//
|
||
|
// When decoding padding, z.padding >= 0
|
||
|
// When decoding a real stream, z.padding == -1
|
||
|
func (z *Reader) decode() (ret xzRet) {
|
||
|
if z.padding >= 0 {
|
||
|
// read all padding in input buffer
|
||
|
for z.buf.inPos < len(z.buf.in) &&
|
||
|
z.buf.in[z.buf.inPos] == 0 {
|
||
|
z.buf.inPos++
|
||
|
z.padding++
|
||
|
}
|
||
|
switch {
|
||
|
case z.buf.inPos == len(z.buf.in) && z.rEOF:
|
||
|
// case: out of padding. no more input data available
|
||
|
if z.padding%4 != 0 {
|
||
|
ret = xzDataError
|
||
|
} else {
|
||
|
ret = xzStreamEnd
|
||
|
}
|
||
|
case z.buf.inPos == len(z.buf.in):
|
||
|
// case: read more padding next loop iteration
|
||
|
ret = xzOK
|
||
|
default:
|
||
|
// case: out of padding. more input data available
|
||
|
if z.padding%4 != 0 {
|
||
|
ret = xzDataError
|
||
|
} else {
|
||
|
xzDecReset(z.dec)
|
||
|
ret = xzStreamEnd
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
ret = xzDecRun(z.dec, z.buf)
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (z *Reader) Read(p []byte) (n int, err error) {
|
||
|
// restore err
|
||
|
err = z.err
|
||
|
// set decoder output buffer to p
|
||
|
z.buf.out = p
|
||
|
z.buf.outPos = 0
|
||
|
for {
|
||
|
// update n
|
||
|
n = z.buf.outPos
|
||
|
// if last call to decoder ended with an error, return that error
|
||
|
if err != nil {
|
||
|
break
|
||
|
}
|
||
|
// if decoder has finished, return with err == io.EOF
|
||
|
if z.dEOF {
|
||
|
err = io.EOF
|
||
|
break
|
||
|
}
|
||
|
// if p full, return with err == nil, unless we have not yet
|
||
|
// read the stream header with Read(nil)
|
||
|
if n == len(p) && z.CheckType != checkUnset {
|
||
|
break
|
||
|
}
|
||
|
// if needed, read more data from z.r
|
||
|
if z.buf.inPos == len(z.buf.in) && !z.rEOF {
|
||
|
rn, e := z.r.Read(z.in[:])
|
||
|
if e != nil && e != io.EOF {
|
||
|
// read error
|
||
|
err = e
|
||
|
break
|
||
|
}
|
||
|
if e == io.EOF {
|
||
|
z.rEOF = true
|
||
|
}
|
||
|
// set new input buffer in z.buf
|
||
|
z.buf.in = z.in[:rn]
|
||
|
z.buf.inPos = 0
|
||
|
}
|
||
|
// decode more data
|
||
|
ret := z.decode()
|
||
|
switch ret {
|
||
|
case xzOK:
|
||
|
// no action needed
|
||
|
case xzStreamEnd:
|
||
|
if z.padding >= 0 {
|
||
|
z.padding = -1
|
||
|
if !z.multistream || z.rEOF {
|
||
|
z.dEOF = true
|
||
|
}
|
||
|
} else {
|
||
|
z.padding = 0
|
||
|
}
|
||
|
case xzUnsupportedCheck:
|
||
|
err = ErrUnsupportedCheck
|
||
|
case xzMemlimitError:
|
||
|
err = ErrMemlimit
|
||
|
case xzFormatError:
|
||
|
err = ErrFormat
|
||
|
case xzOptionsError:
|
||
|
err = ErrOptions
|
||
|
case xzDataError:
|
||
|
err = ErrData
|
||
|
case xzBufError:
|
||
|
err = ErrBuf
|
||
|
}
|
||
|
// save err
|
||
|
z.err = err
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Multistream controls whether the reader is operating in multistream
|
||
|
// mode.
|
||
|
//
|
||
|
// If enabled (the default), the Reader expects the input to be a
|
||
|
// sequence of XZ streams, possibly interspersed with stream padding,
|
||
|
// which it reads one after another. The effect is that the
|
||
|
// concatenation of a sequence of XZ streams or XZ files is
|
||
|
// treated as equivalent to the compressed result of the concatenation
|
||
|
// of the sequence. This is standard behaviour for XZ readers.
|
||
|
//
|
||
|
// Calling Multistream(false) disables this behaviour; disabling the
|
||
|
// behaviour can be useful when reading file formats that distinguish
|
||
|
// individual XZ streams. In this mode, when the Reader reaches the
|
||
|
// end of the stream, Read returns io.EOF. To start the next stream,
|
||
|
// call z.Reset(nil) followed by z.Multistream(false). If there is no
|
||
|
// next stream, z.Reset(nil) will return io.EOF.
|
||
|
func (z *Reader) Multistream(ok bool) {
|
||
|
z.multistream = ok
|
||
|
}
|
||
|
|
||
|
// Reset, for non-nil values of io.Reader r, discards the Reader z's
|
||
|
// state and makes it equivalent to the result of its original state
|
||
|
// from NewReader, but reading from r instead. This permits reusing a
|
||
|
// Reader rather than allocating a new one.
|
||
|
//
|
||
|
// If you wish to leave r unchanged use z.Reset(nil). This keeps r
|
||
|
// unchanged and ensures internal buffering is preserved. If the
|
||
|
// Reader was at the end of a stream it is then ready to read any
|
||
|
// follow on streams. If there are no follow on streams z.Reset(nil)
|
||
|
// returns io.EOF. If the Reader was not at the end of a stream then
|
||
|
// z.Reset(nil) does nothing.
|
||
|
func (z *Reader) Reset(r io.Reader) error {
|
||
|
switch {
|
||
|
case r == nil:
|
||
|
z.multistream = true
|
||
|
if !z.dEOF {
|
||
|
return nil
|
||
|
}
|
||
|
if z.rEOF {
|
||
|
return io.EOF
|
||
|
}
|
||
|
z.dEOF = false
|
||
|
_, err := z.Read(nil) // read stream header
|
||
|
return err
|
||
|
default:
|
||
|
z.r = r
|
||
|
z.multistream = true
|
||
|
z.rEOF = false
|
||
|
z.dEOF = false
|
||
|
z.padding = -1
|
||
|
z.buf.in = nil
|
||
|
z.buf.inPos = 0
|
||
|
xzDecReset(z.dec)
|
||
|
z.err = nil
|
||
|
_, err := z.Read(nil) // read stream header
|
||
|
return err
|
||
|
}
|
||
|
}
|