// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> // All rights reserved. // // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. package table import ( "encoding/binary" "errors" "fmt" "io" "github.com/golang/snappy" "github.com/syndtr/goleveldb/leveldb/comparer" "github.com/syndtr/goleveldb/leveldb/filter" "github.com/syndtr/goleveldb/leveldb/opt" "github.com/syndtr/goleveldb/leveldb/util" ) func sharedPrefixLen(a, b []byte) int { i, n := 0, len(a) if n > len(b) { n = len(b) } for i < n && a[i] == b[i] { i++ } return i } type blockWriter struct { restartInterval int buf util.Buffer nEntries int prevKey []byte restarts []uint32 scratch []byte } func (w *blockWriter) append(key, value []byte) { nShared := 0 if w.nEntries%w.restartInterval == 0 { w.restarts = append(w.restarts, uint32(w.buf.Len())) } else { nShared = sharedPrefixLen(w.prevKey, key) } n := binary.PutUvarint(w.scratch[0:], uint64(nShared)) n += binary.PutUvarint(w.scratch[n:], uint64(len(key)-nShared)) n += binary.PutUvarint(w.scratch[n:], uint64(len(value))) w.buf.Write(w.scratch[:n]) w.buf.Write(key[nShared:]) w.buf.Write(value) w.prevKey = append(w.prevKey[:0], key...) w.nEntries++ } func (w *blockWriter) finish() { // Write restarts entry. if w.nEntries == 0 { // Must have at least one restart entry. w.restarts = append(w.restarts, 0) } w.restarts = append(w.restarts, uint32(len(w.restarts))) for _, x := range w.restarts { buf4 := w.buf.Alloc(4) binary.LittleEndian.PutUint32(buf4, x) } } func (w *blockWriter) reset() { w.buf.Reset() w.nEntries = 0 w.restarts = w.restarts[:0] } func (w *blockWriter) bytesLen() int { restartsLen := len(w.restarts) if restartsLen == 0 { restartsLen = 1 } return w.buf.Len() + 4*restartsLen + 4 } type filterWriter struct { generator filter.FilterGenerator buf util.Buffer nKeys int offsets []uint32 } func (w *filterWriter) add(key []byte) { if w.generator == nil { return } w.generator.Add(key) w.nKeys++ } func (w *filterWriter) flush(offset uint64) { if w.generator == nil { return } for x := int(offset / filterBase); x > len(w.offsets); { w.generate() } } func (w *filterWriter) finish() { if w.generator == nil { return } // Generate last keys. if w.nKeys > 0 { w.generate() } w.offsets = append(w.offsets, uint32(w.buf.Len())) for _, x := range w.offsets { buf4 := w.buf.Alloc(4) binary.LittleEndian.PutUint32(buf4, x) } w.buf.WriteByte(filterBaseLg) } func (w *filterWriter) generate() { // Record offset. w.offsets = append(w.offsets, uint32(w.buf.Len())) // Generate filters. if w.nKeys > 0 { w.generator.Generate(&w.buf) w.nKeys = 0 } } // Writer is a table writer. type Writer struct { writer io.Writer err error // Options cmp comparer.Comparer filter filter.Filter compression opt.Compression blockSize int dataBlock blockWriter indexBlock blockWriter filterBlock filterWriter pendingBH blockHandle offset uint64 nEntries int // Scratch allocated enough for 5 uvarint. Block writer should not use // first 20-bytes since it will be used to encode block handle, which // then passed to the block writer itself. scratch [50]byte comparerScratch []byte compressionScratch []byte } func (w *Writer) writeBlock(buf *util.Buffer, compression opt.Compression) (bh blockHandle, err error) { // Compress the buffer if necessary. var b []byte if compression == opt.SnappyCompression { // Allocate scratch enough for compression and block trailer. if n := snappy.MaxEncodedLen(buf.Len()) + blockTrailerLen; len(w.compressionScratch) < n { w.compressionScratch = make([]byte, n) } compressed := snappy.Encode(w.compressionScratch, buf.Bytes()) n := len(compressed) b = compressed[:n+blockTrailerLen] b[n] = blockTypeSnappyCompression } else { tmp := buf.Alloc(blockTrailerLen) tmp[0] = blockTypeNoCompression b = buf.Bytes() } // Calculate the checksum. n := len(b) - 4 checksum := util.NewCRC(b[:n]).Value() binary.LittleEndian.PutUint32(b[n:], checksum) // Write the buffer to the file. _, err = w.writer.Write(b) if err != nil { return } bh = blockHandle{w.offset, uint64(len(b) - blockTrailerLen)} w.offset += uint64(len(b)) return } func (w *Writer) flushPendingBH(key []byte) { if w.pendingBH.length == 0 { return } var separator []byte if len(key) == 0 { separator = w.cmp.Successor(w.comparerScratch[:0], w.dataBlock.prevKey) } else { separator = w.cmp.Separator(w.comparerScratch[:0], w.dataBlock.prevKey, key) } if separator == nil { separator = w.dataBlock.prevKey } else { w.comparerScratch = separator } n := encodeBlockHandle(w.scratch[:20], w.pendingBH) // Append the block handle to the index block. w.indexBlock.append(separator, w.scratch[:n]) // Reset prev key of the data block. w.dataBlock.prevKey = w.dataBlock.prevKey[:0] // Clear pending block handle. w.pendingBH = blockHandle{} } func (w *Writer) finishBlock() error { w.dataBlock.finish() bh, err := w.writeBlock(&w.dataBlock.buf, w.compression) if err != nil { return err } w.pendingBH = bh // Reset the data block. w.dataBlock.reset() // Flush the filter block. w.filterBlock.flush(w.offset) return nil } // Append appends key/value pair to the table. The keys passed must // be in increasing order. // // It is safe to modify the contents of the arguments after Append returns. func (w *Writer) Append(key, value []byte) error { if w.err != nil { return w.err } if w.nEntries > 0 && w.cmp.Compare(w.dataBlock.prevKey, key) >= 0 { w.err = fmt.Errorf("leveldb/table: Writer: keys are not in increasing order: %q, %q", w.dataBlock.prevKey, key) return w.err } w.flushPendingBH(key) // Append key/value pair to the data block. w.dataBlock.append(key, value) // Add key to the filter block. w.filterBlock.add(key) // Finish the data block if block size target reached. if w.dataBlock.bytesLen() >= w.blockSize { if err := w.finishBlock(); err != nil { w.err = err return w.err } } w.nEntries++ return nil } // BlocksLen returns number of blocks written so far. func (w *Writer) BlocksLen() int { n := w.indexBlock.nEntries if w.pendingBH.length > 0 { // Includes the pending block. n++ } return n } // EntriesLen returns number of entries added so far. func (w *Writer) EntriesLen() int { return w.nEntries } // BytesLen returns number of bytes written so far. func (w *Writer) BytesLen() int { return int(w.offset) } // Close will finalize the table. Calling Append is not possible // after Close, but calling BlocksLen, EntriesLen and BytesLen // is still possible. func (w *Writer) Close() error { if w.err != nil { return w.err } // Write the last data block. Or empty data block if there // aren't any data blocks at all. if w.dataBlock.nEntries > 0 || w.nEntries == 0 { if err := w.finishBlock(); err != nil { w.err = err return w.err } } w.flushPendingBH(nil) // Write the filter block. var filterBH blockHandle w.filterBlock.finish() if buf := &w.filterBlock.buf; buf.Len() > 0 { filterBH, w.err = w.writeBlock(buf, opt.NoCompression) if w.err != nil { return w.err } } // Write the metaindex block. if filterBH.length > 0 { key := []byte("filter." + w.filter.Name()) n := encodeBlockHandle(w.scratch[:20], filterBH) w.dataBlock.append(key, w.scratch[:n]) } w.dataBlock.finish() metaindexBH, err := w.writeBlock(&w.dataBlock.buf, w.compression) if err != nil { w.err = err return w.err } // Write the index block. w.indexBlock.finish() indexBH, err := w.writeBlock(&w.indexBlock.buf, w.compression) if err != nil { w.err = err return w.err } // Write the table footer. footer := w.scratch[:footerLen] for i := range footer { footer[i] = 0 } n := encodeBlockHandle(footer, metaindexBH) encodeBlockHandle(footer[n:], indexBH) copy(footer[footerLen-len(magic):], magic) if _, err := w.writer.Write(footer); err != nil { w.err = err return w.err } w.offset += footerLen w.err = errors.New("leveldb/table: writer is closed") return nil } // NewWriter creates a new initialized table writer for the file. // // Table writer is not safe for concurrent use. func NewWriter(f io.Writer, o *opt.Options) *Writer { w := &Writer{ writer: f, cmp: o.GetComparer(), filter: o.GetFilter(), compression: o.GetCompression(), blockSize: o.GetBlockSize(), comparerScratch: make([]byte, 0), } // data block w.dataBlock.restartInterval = o.GetBlockRestartInterval() // The first 20-bytes are used for encoding block handle. w.dataBlock.scratch = w.scratch[20:] // index block w.indexBlock.restartInterval = 1 w.indexBlock.scratch = w.scratch[20:] // filter block if w.filter != nil { w.filterBlock.generator = w.filter.NewGenerator() w.filterBlock.flush(0) } return w }