Use commit graph files for listing pages (#7314)
* Experimental support for git commit graph files and bloom filter index Signed-off-by: Filip Navara <filip.navara@gmail.com> * Force vendor of commitgraph Signed-off-by: Filip Navara <filip.navara@gmail.com> * Remove bloom filter experiment and debug prints * Remove old code for building commit graphs * Remove unused function * Remove mmap usage * gofmt * sort vendor/modules.txt * Add copyright header and log commit-graph errortokarchuk/v1.17
parent
e728b55812
commit
6e2a59e4ce
@ -0,0 +1,35 @@ |
||||
// Copyright 2019 The Gitea Authors.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package git |
||||
|
||||
import ( |
||||
"os" |
||||
"path" |
||||
|
||||
gitealog "code.gitea.io/gitea/modules/log" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" |
||||
cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" |
||||
) |
||||
|
||||
// CommitNodeIndex returns the index for walking commit graph
|
||||
func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *os.File) { |
||||
indexPath := path.Join(r.Path, "objects", "info", "commit-graph") |
||||
|
||||
file, err := os.Open(indexPath) |
||||
if err == nil { |
||||
var index commitgraph.Index |
||||
index, err = commitgraph.OpenFileIndex(file) |
||||
if err == nil { |
||||
return cgobject.NewGraphCommitNodeIndex(index, r.gogitRepo.Storer), file |
||||
} |
||||
} |
||||
|
||||
if !os.IsNotExist(err) { |
||||
gitealog.Warn("Unable to read commit-graph for %s: %v", r.Path, err) |
||||
} |
||||
|
||||
return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil |
||||
} |
@ -0,0 +1,35 @@ |
||||
package commitgraph |
||||
|
||||
import ( |
||||
"time" |
||||
|
||||
"gopkg.in/src-d/go-git.v4/plumbing" |
||||
) |
||||
|
||||
// CommitData is a reduced representation of Commit as presented in the commit graph
|
||||
// file. It is merely useful as an optimization for walking the commit graphs.
|
||||
type CommitData struct { |
||||
// TreeHash is the hash of the root tree of the commit.
|
||||
TreeHash plumbing.Hash |
||||
// ParentIndexes are the indexes of the parent commits of the commit.
|
||||
ParentIndexes []int |
||||
// ParentHashes are the hashes of the parent commits of the commit.
|
||||
ParentHashes []plumbing.Hash |
||||
// Generation number is the pre-computed generation in the commit graph
|
||||
// or zero if not available
|
||||
Generation int |
||||
// When is the timestamp of the commit.
|
||||
When time.Time |
||||
} |
||||
|
||||
// Index represents a representation of commit graph that allows indexed
|
||||
// access to the nodes using commit object hash
|
||||
type Index interface { |
||||
// GetIndexByHash gets the index in the commit graph from commit hash, if available
|
||||
GetIndexByHash(h plumbing.Hash) (int, error) |
||||
// GetNodeByIndex gets the commit node from the commit graph using index
|
||||
// obtained from child node, if available
|
||||
GetCommitDataByIndex(i int) (*CommitData, error) |
||||
// Hashes returns all the hashes that are available in the index
|
||||
Hashes() []plumbing.Hash |
||||
} |
@ -0,0 +1,103 @@ |
||||
// Package commitgraph implements encoding and decoding of commit-graph files.
|
||||
//
|
||||
// Git commit graph format
|
||||
// =======================
|
||||
//
|
||||
// The Git commit graph stores a list of commit OIDs and some associated
|
||||
// metadata, including:
|
||||
//
|
||||
// - The generation number of the commit. Commits with no parents have
|
||||
// generation number 1; commits with parents have generation number
|
||||
// one more than the maximum generation number of its parents. We
|
||||
// reserve zero as special, and can be used to mark a generation
|
||||
// number invalid or as "not computed".
|
||||
//
|
||||
// - The root tree OID.
|
||||
//
|
||||
// - The commit date.
|
||||
//
|
||||
// - The parents of the commit, stored using positional references within
|
||||
// the graph file.
|
||||
//
|
||||
// These positional references are stored as unsigned 32-bit integers
|
||||
// corresponding to the array position within the list of commit OIDs. Due
|
||||
// to some special constants we use to track parents, we can store at most
|
||||
// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits.
|
||||
//
|
||||
// == Commit graph files have the following format:
|
||||
//
|
||||
// In order to allow extensions that add extra data to the graph, we organize
|
||||
// the body into "chunks" and provide a binary lookup table at the beginning
|
||||
// of the body. The header includes certain values, such as number of chunks
|
||||
// and hash type.
|
||||
//
|
||||
// All 4-byte numbers are in network order.
|
||||
//
|
||||
// HEADER:
|
||||
//
|
||||
// 4-byte signature:
|
||||
// The signature is: {'C', 'G', 'P', 'H'}
|
||||
//
|
||||
// 1-byte version number:
|
||||
// Currently, the only valid version is 1.
|
||||
//
|
||||
// 1-byte Hash Version (1 = SHA-1)
|
||||
// We infer the hash length (H) from this value.
|
||||
//
|
||||
// 1-byte number (C) of "chunks"
|
||||
//
|
||||
// 1-byte (reserved for later use)
|
||||
// Current clients should ignore this value.
|
||||
//
|
||||
// CHUNK LOOKUP:
|
||||
//
|
||||
// (C + 1) * 12 bytes listing the table of contents for the chunks:
|
||||
// First 4 bytes describe the chunk id. Value 0 is a terminating label.
|
||||
// Other 8 bytes provide the byte-offset in current file for chunk to
|
||||
// start. (Chunks are ordered contiguously in the file, so you can infer
|
||||
// the length using the next chunk position if necessary.) Each chunk
|
||||
// ID appears at most once.
|
||||
//
|
||||
// The remaining data in the body is described one chunk at a time, and
|
||||
// these chunks may be given in any order. Chunks are required unless
|
||||
// otherwise specified.
|
||||
//
|
||||
// CHUNK DATA:
|
||||
//
|
||||
// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes)
|
||||
// The ith entry, F[i], stores the number of OIDs with first
|
||||
// byte at most i. Thus F[255] stores the total
|
||||
// number of commits (N).
|
||||
//
|
||||
// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes)
|
||||
// The OIDs for all commits in the graph, sorted in ascending order.
|
||||
//
|
||||
// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes)
|
||||
// * The first H bytes are for the OID of the root tree.
|
||||
// * The next 8 bytes are for the positions of the first two parents
|
||||
// of the ith commit. Stores value 0x7000000 if no parent in that
|
||||
// position. If there are more than two parents, the second value
|
||||
// has its most-significant bit on and the other bits store an array
|
||||
// position into the Extra Edge List chunk.
|
||||
// * The next 8 bytes store the generation number of the commit and
|
||||
// the commit time in seconds since EPOCH. The generation number
|
||||
// uses the higher 30 bits of the first 4 bytes, while the commit
|
||||
// time uses the 32 bits of the second 4 bytes, along with the lowest
|
||||
// 2 bits of the lowest byte, storing the 33rd and 34th bit of the
|
||||
// commit time.
|
||||
//
|
||||
// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional]
|
||||
// This list of 4-byte values store the second through nth parents for
|
||||
// all octopus merges. The second parent value in the commit data stores
|
||||
// an array position within this list along with the most-significant bit
|
||||
// on. Starting at that array position, iterate through this list of commit
|
||||
// positions for the parents until reaching a value with the most-significant
|
||||
// bit on. The other bits correspond to the position of the last parent.
|
||||
//
|
||||
// TRAILER:
|
||||
//
|
||||
// H-byte HASH-checksum of all of the above.
|
||||
//
|
||||
// Source:
|
||||
// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt
|
||||
package commitgraph |
@ -0,0 +1,190 @@ |
||||
package commitgraph |
||||
|
||||
import ( |
||||
"crypto/sha1" |
||||
"hash" |
||||
"io" |
||||
|
||||
"gopkg.in/src-d/go-git.v4/plumbing" |
||||
"gopkg.in/src-d/go-git.v4/utils/binary" |
||||
) |
||||
|
||||
// Encoder writes MemoryIndex structs to an output stream.
|
||||
type Encoder struct { |
||||
io.Writer |
||||
hash hash.Hash |
||||
} |
||||
|
||||
// NewEncoder returns a new stream encoder that writes to w.
|
||||
func NewEncoder(w io.Writer) *Encoder { |
||||
h := sha1.New() |
||||
mw := io.MultiWriter(w, h) |
||||
return &Encoder{mw, h} |
||||
} |
||||
|
||||
// Encode writes an index into the commit-graph file
|
||||
func (e *Encoder) Encode(idx Index) error { |
||||
var err error |
||||
|
||||
// Get all the hashes in the input index
|
||||
hashes := idx.Hashes() |
||||
|
||||
// Sort the inout and prepare helper structures we'll need for encoding
|
||||
hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes) |
||||
|
||||
chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature} |
||||
chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36} |
||||
if extraEdgesCount > 0 { |
||||
chunkSignatures = append(chunkSignatures, extraEdgeListSignature) |
||||
chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4) |
||||
} |
||||
|
||||
if err = e.encodeFileHeader(len(chunkSignatures)); err != nil { |
||||
return err |
||||
} |
||||
if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { |
||||
return err |
||||
} |
||||
if err = e.encodeFanout(fanout); err != nil { |
||||
return err |
||||
} |
||||
if err = e.encodeOidLookup(hashes); err != nil { |
||||
return err |
||||
} |
||||
if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil { |
||||
if err = e.encodeExtraEdges(extraEdges); err != nil { |
||||
return err |
||||
} |
||||
} |
||||
if err != nil { |
||||
return err |
||||
} |
||||
return e.encodeChecksum() |
||||
} |
||||
|
||||
func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) { |
||||
// Sort the hashes and build our index
|
||||
plumbing.HashesSort(hashes) |
||||
hashToIndex = make(map[plumbing.Hash]uint32) |
||||
fanout = make([]uint32, 256) |
||||
for i, hash := range hashes { |
||||
hashToIndex[hash] = uint32(i) |
||||
fanout[hash[0]]++ |
||||
} |
||||
|
||||
// Convert the fanout to cumulative values
|
||||
for i := 1; i <= 0xff; i++ { |
||||
fanout[i] += fanout[i-1] |
||||
} |
||||
|
||||
// Find out if we will need extra edge table
|
||||
for i := 0; i < len(hashes); i++ { |
||||
v, _ := idx.GetCommitDataByIndex(i) |
||||
if len(v.ParentHashes) > 2 { |
||||
extraEdgesCount += uint32(len(v.ParentHashes) - 1) |
||||
break |
||||
} |
||||
} |
||||
|
||||
return |
||||
} |
||||
|
||||
func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { |
||||
if _, err = e.Write(commitFileSignature); err == nil { |
||||
_, err = e.Write([]byte{1, 1, byte(chunkCount), 0}) |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { |
||||
// 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator
|
||||
offset := uint64(8 + len(chunkSignatures)*12 + 12) |
||||
for i, signature := range chunkSignatures { |
||||
if _, err = e.Write(signature); err == nil { |
||||
err = binary.WriteUint64(e, offset) |
||||
} |
||||
if err != nil { |
||||
return |
||||
} |
||||
offset += chunkSizes[i] |
||||
} |
||||
if _, err = e.Write(lastSignature); err == nil { |
||||
err = binary.WriteUint64(e, offset) |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (e *Encoder) encodeFanout(fanout []uint32) (err error) { |
||||
for i := 0; i <= 0xff; i++ { |
||||
if err = binary.WriteUint32(e, fanout[i]); err != nil { |
||||
return |
||||
} |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { |
||||
for _, hash := range hashes { |
||||
if _, err = e.Write(hash[:]); err != nil { |
||||
return err |
||||
} |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) { |
||||
for _, hash := range hashes { |
||||
origIndex, _ := idx.GetIndexByHash(hash) |
||||
commitData, _ := idx.GetCommitDataByIndex(origIndex) |
||||
if _, err = e.Write(commitData.TreeHash[:]); err != nil { |
||||
return |
||||
} |
||||
|
||||
var parent1, parent2 uint32 |
||||
if len(commitData.ParentHashes) == 0 { |
||||
parent1 = parentNone |
||||
parent2 = parentNone |
||||
} else if len(commitData.ParentHashes) == 1 { |
||||
parent1 = hashToIndex[commitData.ParentHashes[0]] |
||||
parent2 = parentNone |
||||
} else if len(commitData.ParentHashes) == 2 { |
||||
parent1 = hashToIndex[commitData.ParentHashes[0]] |
||||
parent2 = hashToIndex[commitData.ParentHashes[1]] |
||||
} else if len(commitData.ParentHashes) > 2 { |
||||
parent1 = hashToIndex[commitData.ParentHashes[0]] |
||||
parent2 = uint32(len(extraEdges)) | parentOctopusUsed |
||||
for _, parentHash := range commitData.ParentHashes[1:] { |
||||
extraEdges = append(extraEdges, hashToIndex[parentHash]) |
||||
} |
||||
extraEdges[len(extraEdges)-1] |= parentLast |
||||
} |
||||
|
||||
if err = binary.WriteUint32(e, parent1); err == nil { |
||||
err = binary.WriteUint32(e, parent2) |
||||
} |
||||
if err != nil { |
||||
return |
||||
} |
||||
|
||||
unixTime := uint64(commitData.When.Unix()) |
||||
unixTime |= uint64(commitData.Generation) << 34 |
||||
if err = binary.WriteUint64(e, unixTime); err != nil { |
||||
return |
||||
} |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) { |
||||
for _, parent := range extraEdges { |
||||
if err = binary.WriteUint32(e, parent); err != nil { |
||||
return |
||||
} |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (e *Encoder) encodeChecksum() error { |
||||
_, err := e.Write(e.hash.Sum(nil)[:20]) |
||||
return err |
||||
} |
@ -0,0 +1,259 @@ |
||||
package commitgraph |
||||
|
||||
import ( |
||||
"bytes" |
||||
encbin "encoding/binary" |
||||
"errors" |
||||
"io" |
||||
"time" |
||||
|
||||
"gopkg.in/src-d/go-git.v4/plumbing" |
||||
"gopkg.in/src-d/go-git.v4/utils/binary" |
||||
) |
||||
|
||||
var ( |
||||
// ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph
|
||||
// file version is not supported.
|
||||
ErrUnsupportedVersion = errors.New("Unsupported version") |
||||
// ErrUnsupportedHash is returned by OpenFileIndex when the commit graph
|
||||
// hash function is not supported. Currently only SHA-1 is defined and
|
||||
// supported
|
||||
ErrUnsupportedHash = errors.New("Unsupported hash algorithm") |
||||
// ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit
|
||||
// graph file is corrupted.
|
||||
ErrMalformedCommitGraphFile = errors.New("Malformed commit graph file") |
||||
|
||||
commitFileSignature = []byte{'C', 'G', 'P', 'H'} |
||||
oidFanoutSignature = []byte{'O', 'I', 'D', 'F'} |
||||
oidLookupSignature = []byte{'O', 'I', 'D', 'L'} |
||||
commitDataSignature = []byte{'C', 'D', 'A', 'T'} |
||||
extraEdgeListSignature = []byte{'E', 'D', 'G', 'E'} |
||||
lastSignature = []byte{0, 0, 0, 0} |
||||
|
||||
parentNone = uint32(0x70000000) |
||||
parentOctopusUsed = uint32(0x80000000) |
||||
parentOctopusMask = uint32(0x7fffffff) |
||||
parentLast = uint32(0x80000000) |
||||
) |
||||
|
||||
type fileIndex struct { |
||||
reader io.ReaderAt |
||||
fanout [256]int |
||||
oidFanoutOffset int64 |
||||
oidLookupOffset int64 |
||||
commitDataOffset int64 |
||||
extraEdgeListOffset int64 |
||||
} |
||||
|
||||
// OpenFileIndex opens a serialized commit graph file in the format described at
|
||||
// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
|
||||
func OpenFileIndex(reader io.ReaderAt) (Index, error) { |
||||
fi := &fileIndex{reader: reader} |
||||
|
||||
if err := fi.verifyFileHeader(); err != nil { |
||||
return nil, err |
||||
} |
||||
if err := fi.readChunkHeaders(); err != nil { |
||||
return nil, err |
||||
} |
||||
if err := fi.readFanout(); err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return fi, nil |
||||
} |
||||
|
||||
func (fi *fileIndex) verifyFileHeader() error { |
||||
// Verify file signature
|
||||
var signature = make([]byte, 4) |
||||
if _, err := fi.reader.ReadAt(signature, 0); err != nil { |
||||
return err |
||||
} |
||||
if !bytes.Equal(signature, commitFileSignature) { |
||||
return ErrMalformedCommitGraphFile |
||||
} |
||||
|
||||
// Read and verify the file header
|
||||
var header = make([]byte, 4) |
||||
if _, err := fi.reader.ReadAt(header, 4); err != nil { |
||||
return err |
||||
} |
||||
if header[0] != 1 { |
||||
return ErrUnsupportedVersion |
||||
} |
||||
if header[1] != 1 { |
||||
return ErrUnsupportedHash |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (fi *fileIndex) readChunkHeaders() error { |
||||
var chunkID = make([]byte, 4) |
||||
for i := 0; ; i++ { |
||||
chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) |
||||
if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { |
||||
return err |
||||
} |
||||
chunkOffset, err := binary.ReadUint64(chunkHeader) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
if bytes.Equal(chunkID, oidFanoutSignature) { |
||||
fi.oidFanoutOffset = int64(chunkOffset) |
||||
} else if bytes.Equal(chunkID, oidLookupSignature) { |
||||
fi.oidLookupOffset = int64(chunkOffset) |
||||
} else if bytes.Equal(chunkID, commitDataSignature) { |
||||
fi.commitDataOffset = int64(chunkOffset) |
||||
} else if bytes.Equal(chunkID, extraEdgeListSignature) { |
||||
fi.extraEdgeListOffset = int64(chunkOffset) |
||||
} else if bytes.Equal(chunkID, lastSignature) { |
||||
break |
||||
} |
||||
} |
||||
|
||||
if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.commitDataOffset <= 0 { |
||||
return ErrMalformedCommitGraphFile |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (fi *fileIndex) readFanout() error { |
||||
fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4) |
||||
for i := 0; i < 256; i++ { |
||||
fanoutValue, err := binary.ReadUint32(fanoutReader) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if fanoutValue > 0x7fffffff { |
||||
return ErrMalformedCommitGraphFile |
||||
} |
||||
fi.fanout[i] = int(fanoutValue) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) { |
||||
var oid plumbing.Hash |
||||
|
||||
// Find the hash in the oid lookup table
|
||||
var low int |
||||
if h[0] == 0 { |
||||
low = 0 |
||||
} else { |
||||
low = fi.fanout[h[0]-1] |
||||
} |
||||
high := fi.fanout[h[0]] |
||||
for low < high { |
||||
mid := (low + high) >> 1 |
||||
offset := fi.oidLookupOffset + int64(mid)*20 |
||||
if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { |
||||
return 0, err |
||||
} |
||||
cmp := bytes.Compare(h[:], oid[:]) |
||||
if cmp < 0 { |
||||
high = mid |
||||
} else if cmp == 0 { |
||||
return mid, nil |
||||
} else { |
||||
low = mid + 1 |
||||
} |
||||
} |
||||
|
||||
return 0, plumbing.ErrObjectNotFound |
||||
} |
||||
|
||||
func (fi *fileIndex) GetCommitDataByIndex(idx int) (*CommitData, error) { |
||||
if idx >= fi.fanout[0xff] { |
||||
return nil, plumbing.ErrObjectNotFound |
||||
} |
||||
|
||||
offset := fi.commitDataOffset + int64(idx)*36 |
||||
commitDataReader := io.NewSectionReader(fi.reader, offset, 36) |
||||
|
||||
treeHash, err := binary.ReadHash(commitDataReader) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
parent1, err := binary.ReadUint32(commitDataReader) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
parent2, err := binary.ReadUint32(commitDataReader) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
genAndTime, err := binary.ReadUint64(commitDataReader) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
var parentIndexes []int |
||||
if parent2&parentOctopusUsed == parentOctopusUsed { |
||||
// Octopus merge
|
||||
parentIndexes = []int{int(parent1 & parentOctopusMask)} |
||||
offset := fi.extraEdgeListOffset + 4*int64(parent2&parentOctopusMask) |
||||
buf := make([]byte, 4) |
||||
for { |
||||
_, err := fi.reader.ReadAt(buf, offset) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
parent := encbin.BigEndian.Uint32(buf) |
||||
offset += 4 |
||||
parentIndexes = append(parentIndexes, int(parent&parentOctopusMask)) |
||||
if parent&parentLast == parentLast { |
||||
break |
||||
} |
||||
} |
||||
} else if parent2 != parentNone { |
||||
parentIndexes = []int{int(parent1 & parentOctopusMask), int(parent2 & parentOctopusMask)} |
||||
} else if parent1 != parentNone { |
||||
parentIndexes = []int{int(parent1 & parentOctopusMask)} |
||||
} |
||||
|
||||
parentHashes, err := fi.getHashesFromIndexes(parentIndexes) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return &CommitData{ |
||||
TreeHash: treeHash, |
||||
ParentIndexes: parentIndexes, |
||||
ParentHashes: parentHashes, |
||||
Generation: int(genAndTime >> 34), |
||||
When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), |
||||
}, nil |
||||
} |
||||
|
||||
func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error) { |
||||
hashes := make([]plumbing.Hash, len(indexes)) |
||||
|
||||
for i, idx := range indexes { |
||||
if idx >= fi.fanout[0xff] { |
||||
return nil, ErrMalformedCommitGraphFile |
||||
} |
||||
|
||||
offset := fi.oidLookupOffset + int64(idx)*20 |
||||
if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { |
||||
return nil, err |
||||
} |
||||
} |
||||
|
||||
return hashes, nil |
||||
} |
||||
|
||||
// Hashes returns all the hashes that are available in the index
|
||||
func (fi *fileIndex) Hashes() []plumbing.Hash { |
||||
hashes := make([]plumbing.Hash, fi.fanout[0xff]) |
||||
for i := 0; i < int(fi.fanout[0xff]); i++ { |
||||
offset := fi.oidLookupOffset + int64(i)*20 |
||||
if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 { |
||||
return nil |
||||
} |
||||
} |
||||
return hashes |
||||
} |
@ -0,0 +1,72 @@ |
||||
package commitgraph |
||||
|
||||
import ( |
||||
"gopkg.in/src-d/go-git.v4/plumbing" |
||||
) |
||||
|
||||
// MemoryIndex provides a way to build the commit-graph in memory
|
||||
// for later encoding to file.
|
||||
type MemoryIndex struct { |
||||
commitData []*CommitData |
||||
indexMap map[plumbing.Hash]int |
||||
} |
||||
|
||||
// NewMemoryIndex creates in-memory commit graph representation
|
||||
func NewMemoryIndex() *MemoryIndex { |
||||
return &MemoryIndex{ |
||||
indexMap: make(map[plumbing.Hash]int), |
||||
} |
||||
} |
||||
|
||||
// GetIndexByHash gets the index in the commit graph from commit hash, if available
|
||||
func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (int, error) { |
||||
i, ok := mi.indexMap[h] |
||||
if ok { |
||||
return i, nil |
||||
} |
||||
|
||||
return 0, plumbing.ErrObjectNotFound |
||||
} |
||||
|
||||
// GetCommitDataByIndex gets the commit node from the commit graph using index
|
||||
// obtained from child node, if available
|
||||
func (mi *MemoryIndex) GetCommitDataByIndex(i int) (*CommitData, error) { |
||||
if int(i) >= len(mi.commitData) { |
||||
return nil, plumbing.ErrObjectNotFound |
||||
} |
||||
|
||||
commitData := mi.commitData[i] |
||||
|
||||
// Map parent hashes to parent indexes
|
||||
if commitData.ParentIndexes == nil { |
||||
parentIndexes := make([]int, len(commitData.ParentHashes)) |
||||
for i, parentHash := range commitData.ParentHashes { |
||||
var err error |
||||
if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { |
||||
return nil, err |
||||
} |
||||
} |
||||
commitData.ParentIndexes = parentIndexes |
||||
} |
||||
|
||||
return commitData, nil |
||||
} |
||||
|
||||
// Hashes returns all the hashes that are available in the index
|
||||
func (mi *MemoryIndex) Hashes() []plumbing.Hash { |
||||
hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) |
||||
for k := range mi.indexMap { |
||||
hashes = append(hashes, k) |
||||
} |
||||
return hashes |
||||
} |
||||
|
||||
// Add adds new node to the memory index
|
||||
func (mi *MemoryIndex) Add(hash plumbing.Hash, commitData *CommitData) { |
||||
// The parent indexes are calculated lazily in GetNodeByIndex
|
||||
// which allows adding nodes out of order as long as all parents
|
||||
// are eventually resolved
|
||||
commitData.ParentIndexes = nil |
||||
mi.indexMap[hash] = len(mi.commitData) |
||||
mi.commitData = append(mi.commitData, commitData) |
||||
} |
@ -0,0 +1,98 @@ |
||||
package commitgraph |
||||
|
||||
import ( |
||||
"io" |
||||
"time" |
||||
|
||||
"gopkg.in/src-d/go-git.v4/plumbing" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/object" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/storer" |
||||
) |
||||
|
||||
// CommitNode is generic interface encapsulating a lightweight commit object retrieved
|
||||
// from CommitNodeIndex
|
||||
type CommitNode interface { |
||||
// ID returns the Commit object id referenced by the commit graph node.
|
||||
ID() plumbing.Hash |
||||
// Tree returns the Tree referenced by the commit graph node.
|
||||
Tree() (*object.Tree, error) |
||||
// CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node.
|
||||
CommitTime() time.Time |
||||
// NumParents returns the number of parents in a commit.
|
||||
NumParents() int |
||||
// ParentNodes return a CommitNodeIter for parents of specified node.
|
||||
ParentNodes() CommitNodeIter |
||||
// ParentNode returns the ith parent of a commit.
|
||||
ParentNode(i int) (CommitNode, error) |
||||
// ParentHashes returns hashes of the parent commits for a specified node
|
||||
ParentHashes() []plumbing.Hash |
||||
// Generation returns the generation of the commit for reachability analysis.
|
||||
// Objects with newer generation are not reachable from objects of older generation.
|
||||
Generation() uint64 |
||||
// Commit returns the full commit object from the node
|
||||
Commit() (*object.Commit, error) |
||||
} |
||||
|
||||
// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects
|
||||
type CommitNodeIndex interface { |
||||
// Get returns a commit node from a commit hash
|
||||
Get(hash plumbing.Hash) (CommitNode, error) |
||||
} |
||||
|
||||
// CommitNodeIter is a generic closable interface for iterating over commit nodes.
|
||||
type CommitNodeIter interface { |
||||
Next() (CommitNode, error) |
||||
ForEach(func(CommitNode) error) error |
||||
Close() |
||||
} |
||||
|
||||
// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex.
|
||||
type parentCommitNodeIter struct { |
||||
node CommitNode |
||||
i int |
||||
} |
||||
|
||||
func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter { |
||||
return &parentCommitNodeIter{node, 0} |
||||
} |
||||
|
||||
// Next moves the iterator to the next commit and returns a pointer to it. If
|
||||
// there are no more commits, it returns io.EOF.
|
||||
func (iter *parentCommitNodeIter) Next() (CommitNode, error) { |
||||
obj, err := iter.node.ParentNode(iter.i) |
||||
if err == object.ErrParentNotFound { |
||||
return nil, io.EOF |
||||
} |
||||
if err == nil { |
||||
iter.i++ |
||||
} |
||||
|
||||
return obj, err |
||||
} |
||||
|
||||
// ForEach call the cb function for each commit contained on this iter until
|
||||
// an error appends or the end of the iter is reached. If ErrStop is sent
|
||||
// the iteration is stopped but no error is returned. The iterator is closed.
|
||||
func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error { |
||||
for { |
||||
obj, err := iter.Next() |
||||
if err != nil { |
||||
if err == io.EOF { |
||||
return nil |
||||
} |
||||
|
||||
return err |
||||
} |
||||
|
||||
if err := cb(obj); err != nil { |
||||
if err == storer.ErrStop { |
||||
return nil |
||||
} |
||||
|
||||
return err |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (iter *parentCommitNodeIter) Close() { |
||||
} |
131
vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go
generated
vendored
131
vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go
generated
vendored
@ -0,0 +1,131 @@ |
||||
package commitgraph |
||||
|
||||
import ( |
||||
"fmt" |
||||
"time" |
||||
|
||||
"gopkg.in/src-d/go-git.v4/plumbing" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/object" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/storer" |
||||
) |
||||
|
||||
// graphCommitNode is a reduced representation of Commit as presented in the commit
|
||||
// graph file (commitgraph.Node). It is merely useful as an optimization for walking
|
||||
// the commit graphs.
|
||||
//
|
||||
// graphCommitNode implements the CommitNode interface.
|
||||
type graphCommitNode struct { |
||||
// Hash for the Commit object
|
||||
hash plumbing.Hash |
||||
// Index of the node in the commit graph file
|
||||
index int |
||||
|
||||
commitData *commitgraph.CommitData |
||||
gci *graphCommitNodeIndex |
||||
} |
||||
|
||||
// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit
|
||||
// graph files and the object store.
|
||||
//
|
||||
// graphCommitNodeIndex implements the CommitNodeIndex interface
|
||||
type graphCommitNodeIndex struct { |
||||
commitGraph commitgraph.Index |
||||
s storer.EncodedObjectStorer |
||||
} |
||||
|
||||
// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph
|
||||
// files as backing storage and falls back to object storage when necessary
|
||||
func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { |
||||
return &graphCommitNodeIndex{commitGraph, s} |
||||
} |
||||
|
||||
func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { |
||||
// Check the commit graph first
|
||||
parentIndex, err := gci.commitGraph.GetIndexByHash(hash) |
||||
if err == nil { |
||||
parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return &graphCommitNode{ |
||||
hash: hash, |
||||
index: parentIndex, |
||||
commitData: parent, |
||||
gci: gci, |
||||
}, nil |
||||
} |
||||
|
||||
// Fallback to loading full commit object
|
||||
commit, err := object.GetCommit(gci.s, hash) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return &objectCommitNode{ |
||||
nodeIndex: gci, |
||||
commit: commit, |
||||
}, nil |
||||
} |
||||
|
||||
func (c *graphCommitNode) ID() plumbing.Hash { |
||||
return c.hash |
||||
} |
||||
|
||||
func (c *graphCommitNode) Tree() (*object.Tree, error) { |
||||
return object.GetTree(c.gci.s, c.commitData.TreeHash) |
||||
} |
||||
|
||||
func (c *graphCommitNode) CommitTime() time.Time { |
||||
return c.commitData.When |
||||
} |
||||
|
||||
func (c *graphCommitNode) NumParents() int { |
||||
return len(c.commitData.ParentIndexes) |
||||
} |
||||
|
||||
func (c *graphCommitNode) ParentNodes() CommitNodeIter { |
||||
return newParentgraphCommitNodeIter(c) |
||||
} |
||||
|
||||
func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { |
||||
if i < 0 || i >= len(c.commitData.ParentIndexes) { |
||||
return nil, object.ErrParentNotFound |
||||
} |
||||
|
||||
parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return &graphCommitNode{ |
||||
hash: c.commitData.ParentHashes[i], |
||||
index: c.commitData.ParentIndexes[i], |
||||
commitData: parent, |
||||
gci: c.gci, |
||||
}, nil |
||||
} |
||||
|
||||
func (c *graphCommitNode) ParentHashes() []plumbing.Hash { |
||||
return c.commitData.ParentHashes |
||||
} |
||||
|
||||
func (c *graphCommitNode) Generation() uint64 { |
||||
// If the commit-graph file was generated with older Git version that
|
||||
// set the generation to zero for every commit the generation assumption
|
||||
// is still valid. It is just less useful.
|
||||
return uint64(c.commitData.Generation) |
||||
} |
||||
|
||||
func (c *graphCommitNode) Commit() (*object.Commit, error) { |
||||
return object.GetCommit(c.gci.s, c.hash) |
||||
} |
||||
|
||||
func (c *graphCommitNode) String() string { |
||||
return fmt.Sprintf( |
||||
"%s %s\nDate: %s", |
||||
plumbing.CommitObject, c.ID(), |
||||
c.CommitTime().Format(object.DateFormat), |
||||
) |
||||
} |
90
vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go
generated
vendored
90
vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go
generated
vendored
@ -0,0 +1,90 @@ |
||||
package commitgraph |
||||
|
||||
import ( |
||||
"math" |
||||
"time" |
||||
|
||||
"gopkg.in/src-d/go-git.v4/plumbing" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/object" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/storer" |
||||
) |
||||
|
||||
// objectCommitNode is a representation of Commit as presented in the GIT object format.
|
||||
//
|
||||
// objectCommitNode implements the CommitNode interface.
|
||||
type objectCommitNode struct { |
||||
nodeIndex CommitNodeIndex |
||||
commit *object.Commit |
||||
} |
||||
|
||||
// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses
|
||||
// only object storage to load the nodes
|
||||
func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { |
||||
return &objectCommitNodeIndex{s} |
||||
} |
||||
|
||||
func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { |
||||
commit, err := object.GetCommit(oci.s, hash) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return &objectCommitNode{ |
||||
nodeIndex: oci, |
||||
commit: commit, |
||||
}, nil |
||||
} |
||||
|
||||
// objectCommitNodeIndex is an index that can load CommitNode objects only from the
|
||||
// object store.
|
||||
//
|
||||
// objectCommitNodeIndex implements the CommitNodeIndex interface
|
||||
type objectCommitNodeIndex struct { |
||||
s storer.EncodedObjectStorer |
||||
} |
||||
|
||||
func (c *objectCommitNode) CommitTime() time.Time { |
||||
return c.commit.Committer.When |
||||
} |
||||
|
||||
func (c *objectCommitNode) ID() plumbing.Hash { |
||||
return c.commit.ID() |
||||
} |
||||
|
||||
func (c *objectCommitNode) Tree() (*object.Tree, error) { |
||||
return c.commit.Tree() |
||||
} |
||||
|
||||
func (c *objectCommitNode) NumParents() int { |
||||
return c.commit.NumParents() |
||||
} |
||||
|
||||
func (c *objectCommitNode) ParentNodes() CommitNodeIter { |
||||
return newParentgraphCommitNodeIter(c) |
||||
} |
||||
|
||||
func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { |
||||
if i < 0 || i >= len(c.commit.ParentHashes) { |
||||
return nil, object.ErrParentNotFound |
||||
} |
||||
|
||||
// Note: It's necessary to go through CommitNodeIndex here to ensure
|
||||
// that if the commit-graph file covers only part of the history we
|
||||
// start using it when that part is reached.
|
||||
return c.nodeIndex.Get(c.commit.ParentHashes[i]) |
||||
} |
||||
|
||||
func (c *objectCommitNode) ParentHashes() []plumbing.Hash { |
||||
return c.commit.ParentHashes |
||||
} |
||||
|
||||
func (c *objectCommitNode) Generation() uint64 { |
||||
// Commit nodes representing objects outside of the commit graph can never
|
||||
// be reached by objects from the commit-graph thus we return the highest
|
||||
// possible value.
|
||||
return math.MaxUint64 |
||||
} |
||||
|
||||
func (c *objectCommitNode) Commit() (*object.Commit, error) { |
||||
return c.commit, nil |
||||
} |
105
vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go
generated
vendored
105
vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go
generated
vendored
@ -0,0 +1,105 @@ |
||||
package commitgraph |
||||
|
||||
import ( |
||||
"io" |
||||
|
||||
"github.com/emirpasic/gods/trees/binaryheap" |
||||
|
||||
"gopkg.in/src-d/go-git.v4/plumbing" |
||||
"gopkg.in/src-d/go-git.v4/plumbing/storer" |
||||
) |
||||
|
||||
type commitNodeIteratorByCTime struct { |
||||
heap *binaryheap.Heap |
||||
seenExternal map[plumbing.Hash]bool |
||||
seen map[plumbing.Hash]bool |
||||
} |
||||
|
||||
// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history,
|
||||
// starting at the given commit and visiting its parents while preserving Committer Time order.
|
||||
// this appears to be the closest order to `git log`
|
||||
// The given callback will be called for each visited commit. Each commit will
|
||||
// be visited only once. If the callback returns an error, walking will stop
|
||||
// and will return the error. Other errors might be returned if the history
|
||||
// cannot be traversed (e.g. missing objects). Ignore allows to skip some
|
||||
// commits from being iterated.
|
||||
func NewCommitNodeIterCTime( |
||||
c CommitNode, |
||||
seenExternal map[plumbing.Hash]bool, |
||||
ignore []plumbing.Hash, |
||||
) CommitNodeIter { |
||||
seen := make(map[plumbing.Hash]bool) |
||||
for _, h := range ignore { |
||||
seen[h] = true |
||||
} |
||||
|
||||
heap := binaryheap.NewWith(func(a, b interface{}) int { |
||||
if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { |
||||
return 1 |
||||
} |
||||
return -1 |
||||
}) |
||||
|
||||
heap.Push(c) |
||||
|
||||
return &commitNodeIteratorByCTime{ |
||||
heap: heap, |
||||
seenExternal: seenExternal, |
||||
seen: seen, |
||||
} |
||||
} |
||||
|
||||
func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { |
||||
var c CommitNode |
||||
for { |
||||
cIn, ok := w.heap.Pop() |
||||
if !ok { |
||||
return nil, io.EOF |
||||
} |
||||
c = cIn.(CommitNode) |
||||
cID := c.ID() |
||||
|
||||
if w.seen[cID] || w.seenExternal[cID] { |
||||
continue |
||||
} |
||||
|
||||
w.seen[cID] = true |
||||
|
||||
for i, h := range c.ParentHashes() { |
||||
if w.seen[h] || w.seenExternal[h] { |
||||
continue |
||||
} |
||||
pc, err := c.ParentNode(i) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
w.heap.Push(pc) |
||||
} |
||||
|
||||
return c, nil |
||||
} |
||||
} |
||||
|
||||
func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { |
||||
for { |
||||
c, err := w.Next() |
||||
if err == io.EOF { |
||||
break |
||||
} |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
err = cb(c) |
||||
if err == storer.ErrStop { |
||||
break |
||||
} |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (w *commitNodeIteratorByCTime) Close() {} |
@ -0,0 +1,7 @@ |
||||
// Package commitgraph provides an interface for efficient traversal over Git
|
||||
// commit graph either through the regular object storage, or optionally with
|
||||
// the index stored in commit-graph file (Git 2.18+).
|
||||
//
|
||||
// The API and functionality of this package are considered EXPERIMENTAL and is
|
||||
// not considered stable nor production ready.
|
||||
package commitgraph |
Loading…
Reference in new issue