Use commit graph files for listing pages (#7314)

* Experimental support for git commit graph files and bloom filter index

Signed-off-by: Filip Navara <filip.navara@gmail.com>

* Force vendor of commitgraph

Signed-off-by: Filip Navara <filip.navara@gmail.com>

* Remove bloom filter experiment and debug prints

* Remove old code for building commit graphs

* Remove unused function

* Remove mmap usage

* gofmt

* sort vendor/modules.txt

* Add copyright header and log commit-graph error
tokarchuk/v1.17
Filip Navara 6 years ago committed by Lunny Xiao
parent e728b55812
commit 6e2a59e4ce
  1. 38
      modules/git/commit_info.go
  2. 12
      modules/git/notes.go
  3. 35
      modules/git/repo_commitgraph.go
  4. 35
      vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go
  5. 103
      vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go
  6. 190
      vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go
  7. 259
      vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go
  8. 72
      vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go
  9. 98
      vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go
  10. 131
      vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go
  11. 90
      vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go
  12. 105
      vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go
  13. 7
      vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go
  14. 4
      vendor/modules.txt

@ -8,6 +8,7 @@ import (
"github.com/emirpasic/gods/trees/binaryheap"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph"
)
// GetCommitsInfo gets information of all commits that are corresponding to these entries
@ -19,7 +20,12 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom
entryPaths[i+1] = entry.Name()
}
c, err := commit.repo.gogitRepo.CommitObject(plumbing.Hash(commit.ID))
commitNodeIndex, commitGraphFile := commit.repo.CommitNodeIndex()
if commitGraphFile != nil {
defer commitGraphFile.Close()
}
c, err := commitNodeIndex.Get(plumbing.Hash(commit.ID))
if err != nil {
return nil, nil, err
}
@ -69,14 +75,14 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom
}
type commitAndPaths struct {
commit *object.Commit
commit cgobject.CommitNode
// Paths that are still on the branch represented by commit
paths []string
// Set of hashes for the paths
hashes map[string]plumbing.Hash
}
func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) {
func getCommitTree(c cgobject.CommitNode, treePath string) (*object.Tree, error) {
tree, err := c.Tree()
if err != nil {
return nil, err
@ -93,7 +99,7 @@ func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) {
return tree, nil
}
func getFileHashes(c *object.Commit, treePath string, paths []string) (map[string]plumbing.Hash, error) {
func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) {
tree, err := getCommitTree(c, treePath)
if err == object.ErrDirectoryNotFound {
// The whole tree didn't exist, so return empty map
@ -118,16 +124,16 @@ func getFileHashes(c *object.Commit, treePath string, paths []string) (map[strin
return hashes, nil
}
func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (map[string]*object.Commit, error) {
func getLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) {
// We do a tree traversal with nodes sorted by commit time
heap := binaryheap.NewWith(func(a, b interface{}) int {
if a.(*commitAndPaths).commit.Committer.When.Before(b.(*commitAndPaths).commit.Committer.When) {
if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) {
return 1
}
return -1
})
result := make(map[string]*object.Commit)
resultNodes := make(map[string]cgobject.CommitNode)
initialHashes, err := getFileHashes(c, treePath, paths)
if err != nil {
return nil, err
@ -145,9 +151,9 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
// Load the parent commits for the one we are currently examining
numParents := current.commit.NumParents()
var parents []*object.Commit
var parents []cgobject.CommitNode
for i := 0; i < numParents; i++ {
parent, err := current.commit.Parent(i)
parent, err := current.commit.ParentNode(i)
if err != nil {
break
}
@ -174,7 +180,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
for i, path := range current.paths {
// The results could already contain some newer change for the same path,
// so don't override that and bail out on the file early.
if result[path] == nil {
if resultNodes[path] == nil {
if pathUnchanged[i] {
// The path existed with the same hash in at least one parent so it could
// not have been changed in this commit directly.
@ -188,7 +194,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
// - We are looking at a merge commit and the hash of the file doesn't
// match any of the hashes being merged. This is more common for directories,
// but it can also happen if a file is changed through conflict resolution.
result[path] = current.commit
resultNodes[path] = current.commit
}
}
}
@ -222,5 +228,15 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
}
}
// Post-processing
result := make(map[string]*object.Commit)
for path, commitNode := range resultNodes {
var err error
result[path], err = commitNode.Commit()
if err != nil {
return nil, err
}
}
return result, nil
}

@ -50,7 +50,17 @@ func GetNote(repo *Repository, commitID string, note *Note) error {
return err
}
lastCommits, err := getLastCommitForPaths(commit, "", []string{commitID})
commitNodeIndex, commitGraphFile := repo.CommitNodeIndex()
if commitGraphFile != nil {
defer commitGraphFile.Close()
}
commitNode, err := commitNodeIndex.Get(commit.Hash)
if err != nil {
return nil
}
lastCommits, err := getLastCommitForPaths(commitNode, "", []string{commitID})
if err != nil {
return err
}

@ -0,0 +1,35 @@
// Copyright 2019 The Gitea Authors.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
"os"
"path"
gitealog "code.gitea.io/gitea/modules/log"
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph"
cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph"
)
// CommitNodeIndex returns the index for walking commit graph
func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *os.File) {
indexPath := path.Join(r.Path, "objects", "info", "commit-graph")
file, err := os.Open(indexPath)
if err == nil {
var index commitgraph.Index
index, err = commitgraph.OpenFileIndex(file)
if err == nil {
return cgobject.NewGraphCommitNodeIndex(index, r.gogitRepo.Storer), file
}
}
if !os.IsNotExist(err) {
gitealog.Warn("Unable to read commit-graph for %s: %v", r.Path, err)
}
return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil
}

@ -0,0 +1,35 @@
package commitgraph
import (
"time"
"gopkg.in/src-d/go-git.v4/plumbing"
)
// CommitData is a reduced representation of Commit as presented in the commit graph
// file. It is merely useful as an optimization for walking the commit graphs.
type CommitData struct {
// TreeHash is the hash of the root tree of the commit.
TreeHash plumbing.Hash
// ParentIndexes are the indexes of the parent commits of the commit.
ParentIndexes []int
// ParentHashes are the hashes of the parent commits of the commit.
ParentHashes []plumbing.Hash
// Generation number is the pre-computed generation in the commit graph
// or zero if not available
Generation int
// When is the timestamp of the commit.
When time.Time
}
// Index represents a representation of commit graph that allows indexed
// access to the nodes using commit object hash
type Index interface {
// GetIndexByHash gets the index in the commit graph from commit hash, if available
GetIndexByHash(h plumbing.Hash) (int, error)
// GetNodeByIndex gets the commit node from the commit graph using index
// obtained from child node, if available
GetCommitDataByIndex(i int) (*CommitData, error)
// Hashes returns all the hashes that are available in the index
Hashes() []plumbing.Hash
}

@ -0,0 +1,103 @@
// Package commitgraph implements encoding and decoding of commit-graph files.
//
// Git commit graph format
// =======================
//
// The Git commit graph stores a list of commit OIDs and some associated
// metadata, including:
//
// - The generation number of the commit. Commits with no parents have
// generation number 1; commits with parents have generation number
// one more than the maximum generation number of its parents. We
// reserve zero as special, and can be used to mark a generation
// number invalid or as "not computed".
//
// - The root tree OID.
//
// - The commit date.
//
// - The parents of the commit, stored using positional references within
// the graph file.
//
// These positional references are stored as unsigned 32-bit integers
// corresponding to the array position within the list of commit OIDs. Due
// to some special constants we use to track parents, we can store at most
// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits.
//
// == Commit graph files have the following format:
//
// In order to allow extensions that add extra data to the graph, we organize
// the body into "chunks" and provide a binary lookup table at the beginning
// of the body. The header includes certain values, such as number of chunks
// and hash type.
//
// All 4-byte numbers are in network order.
//
// HEADER:
//
// 4-byte signature:
// The signature is: {'C', 'G', 'P', 'H'}
//
// 1-byte version number:
// Currently, the only valid version is 1.
//
// 1-byte Hash Version (1 = SHA-1)
// We infer the hash length (H) from this value.
//
// 1-byte number (C) of "chunks"
//
// 1-byte (reserved for later use)
// Current clients should ignore this value.
//
// CHUNK LOOKUP:
//
// (C + 1) * 12 bytes listing the table of contents for the chunks:
// First 4 bytes describe the chunk id. Value 0 is a terminating label.
// Other 8 bytes provide the byte-offset in current file for chunk to
// start. (Chunks are ordered contiguously in the file, so you can infer
// the length using the next chunk position if necessary.) Each chunk
// ID appears at most once.
//
// The remaining data in the body is described one chunk at a time, and
// these chunks may be given in any order. Chunks are required unless
// otherwise specified.
//
// CHUNK DATA:
//
// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes)
// The ith entry, F[i], stores the number of OIDs with first
// byte at most i. Thus F[255] stores the total
// number of commits (N).
//
// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes)
// The OIDs for all commits in the graph, sorted in ascending order.
//
// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes)
// * The first H bytes are for the OID of the root tree.
// * The next 8 bytes are for the positions of the first two parents
// of the ith commit. Stores value 0x7000000 if no parent in that
// position. If there are more than two parents, the second value
// has its most-significant bit on and the other bits store an array
// position into the Extra Edge List chunk.
// * The next 8 bytes store the generation number of the commit and
// the commit time in seconds since EPOCH. The generation number
// uses the higher 30 bits of the first 4 bytes, while the commit
// time uses the 32 bits of the second 4 bytes, along with the lowest
// 2 bits of the lowest byte, storing the 33rd and 34th bit of the
// commit time.
//
// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional]
// This list of 4-byte values store the second through nth parents for
// all octopus merges. The second parent value in the commit data stores
// an array position within this list along with the most-significant bit
// on. Starting at that array position, iterate through this list of commit
// positions for the parents until reaching a value with the most-significant
// bit on. The other bits correspond to the position of the last parent.
//
// TRAILER:
//
// H-byte HASH-checksum of all of the above.
//
// Source:
// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt
package commitgraph

@ -0,0 +1,190 @@
package commitgraph
import (
"crypto/sha1"
"hash"
"io"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
// Encoder writes MemoryIndex structs to an output stream.
type Encoder struct {
io.Writer
hash hash.Hash
}
// NewEncoder returns a new stream encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
h := sha1.New()
mw := io.MultiWriter(w, h)
return &Encoder{mw, h}
}
// Encode writes an index into the commit-graph file
func (e *Encoder) Encode(idx Index) error {
var err error
// Get all the hashes in the input index
hashes := idx.Hashes()
// Sort the inout and prepare helper structures we'll need for encoding
hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes)
chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature}
chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36}
if extraEdgesCount > 0 {
chunkSignatures = append(chunkSignatures, extraEdgeListSignature)
chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4)
}
if err = e.encodeFileHeader(len(chunkSignatures)); err != nil {
return err
}
if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil {
return err
}
if err = e.encodeFanout(fanout); err != nil {
return err
}
if err = e.encodeOidLookup(hashes); err != nil {
return err
}
if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil {
if err = e.encodeExtraEdges(extraEdges); err != nil {
return err
}
}
if err != nil {
return err
}
return e.encodeChecksum()
}
func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) {
// Sort the hashes and build our index
plumbing.HashesSort(hashes)
hashToIndex = make(map[plumbing.Hash]uint32)
fanout = make([]uint32, 256)
for i, hash := range hashes {
hashToIndex[hash] = uint32(i)
fanout[hash[0]]++
}
// Convert the fanout to cumulative values
for i := 1; i <= 0xff; i++ {
fanout[i] += fanout[i-1]
}
// Find out if we will need extra edge table
for i := 0; i < len(hashes); i++ {
v, _ := idx.GetCommitDataByIndex(i)
if len(v.ParentHashes) > 2 {
extraEdgesCount += uint32(len(v.ParentHashes) - 1)
break
}
}
return
}
func (e *Encoder) encodeFileHeader(chunkCount int) (err error) {
if _, err = e.Write(commitFileSignature); err == nil {
_, err = e.Write([]byte{1, 1, byte(chunkCount), 0})
}
return
}
func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) {
// 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator
offset := uint64(8 + len(chunkSignatures)*12 + 12)
for i, signature := range chunkSignatures {
if _, err = e.Write(signature); err == nil {
err = binary.WriteUint64(e, offset)
}
if err != nil {
return
}
offset += chunkSizes[i]
}
if _, err = e.Write(lastSignature); err == nil {
err = binary.WriteUint64(e, offset)
}
return
}
func (e *Encoder) encodeFanout(fanout []uint32) (err error) {
for i := 0; i <= 0xff; i++ {
if err = binary.WriteUint32(e, fanout[i]); err != nil {
return
}
}
return
}
func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) {
for _, hash := range hashes {
if _, err = e.Write(hash[:]); err != nil {
return err
}
}
return
}
func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) {
for _, hash := range hashes {
origIndex, _ := idx.GetIndexByHash(hash)
commitData, _ := idx.GetCommitDataByIndex(origIndex)
if _, err = e.Write(commitData.TreeHash[:]); err != nil {
return
}
var parent1, parent2 uint32
if len(commitData.ParentHashes) == 0 {
parent1 = parentNone
parent2 = parentNone
} else if len(commitData.ParentHashes) == 1 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = parentNone
} else if len(commitData.ParentHashes) == 2 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = hashToIndex[commitData.ParentHashes[1]]
} else if len(commitData.ParentHashes) > 2 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = uint32(len(extraEdges)) | parentOctopusUsed
for _, parentHash := range commitData.ParentHashes[1:] {
extraEdges = append(extraEdges, hashToIndex[parentHash])
}
extraEdges[len(extraEdges)-1] |= parentLast
}
if err = binary.WriteUint32(e, parent1); err == nil {
err = binary.WriteUint32(e, parent2)
}
if err != nil {
return
}
unixTime := uint64(commitData.When.Unix())
unixTime |= uint64(commitData.Generation) << 34
if err = binary.WriteUint64(e, unixTime); err != nil {
return
}
}
return
}
func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) {
for _, parent := range extraEdges {
if err = binary.WriteUint32(e, parent); err != nil {
return
}
}
return
}
func (e *Encoder) encodeChecksum() error {
_, err := e.Write(e.hash.Sum(nil)[:20])
return err
}

@ -0,0 +1,259 @@
package commitgraph
import (
"bytes"
encbin "encoding/binary"
"errors"
"io"
"time"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
var (
// ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph
// file version is not supported.
ErrUnsupportedVersion = errors.New("Unsupported version")
// ErrUnsupportedHash is returned by OpenFileIndex when the commit graph
// hash function is not supported. Currently only SHA-1 is defined and
// supported
ErrUnsupportedHash = errors.New("Unsupported hash algorithm")
// ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit
// graph file is corrupted.
ErrMalformedCommitGraphFile = errors.New("Malformed commit graph file")
commitFileSignature = []byte{'C', 'G', 'P', 'H'}
oidFanoutSignature = []byte{'O', 'I', 'D', 'F'}
oidLookupSignature = []byte{'O', 'I', 'D', 'L'}
commitDataSignature = []byte{'C', 'D', 'A', 'T'}
extraEdgeListSignature = []byte{'E', 'D', 'G', 'E'}
lastSignature = []byte{0, 0, 0, 0}
parentNone = uint32(0x70000000)
parentOctopusUsed = uint32(0x80000000)
parentOctopusMask = uint32(0x7fffffff)
parentLast = uint32(0x80000000)
)
type fileIndex struct {
reader io.ReaderAt
fanout [256]int
oidFanoutOffset int64
oidLookupOffset int64
commitDataOffset int64
extraEdgeListOffset int64
}
// OpenFileIndex opens a serialized commit graph file in the format described at
// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
func OpenFileIndex(reader io.ReaderAt) (Index, error) {
fi := &fileIndex{reader: reader}
if err := fi.verifyFileHeader(); err != nil {
return nil, err
}
if err := fi.readChunkHeaders(); err != nil {
return nil, err
}
if err := fi.readFanout(); err != nil {
return nil, err
}
return fi, nil
}
func (fi *fileIndex) verifyFileHeader() error {
// Verify file signature
var signature = make([]byte, 4)
if _, err := fi.reader.ReadAt(signature, 0); err != nil {
return err
}
if !bytes.Equal(signature, commitFileSignature) {
return ErrMalformedCommitGraphFile
}
// Read and verify the file header
var header = make([]byte, 4)
if _, err := fi.reader.ReadAt(header, 4); err != nil {
return err
}
if header[0] != 1 {
return ErrUnsupportedVersion
}
if header[1] != 1 {
return ErrUnsupportedHash
}
return nil
}
func (fi *fileIndex) readChunkHeaders() error {
var chunkID = make([]byte, 4)
for i := 0; ; i++ {
chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12)
if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil {
return err
}
chunkOffset, err := binary.ReadUint64(chunkHeader)
if err != nil {
return err
}
if bytes.Equal(chunkID, oidFanoutSignature) {
fi.oidFanoutOffset = int64(chunkOffset)
} else if bytes.Equal(chunkID, oidLookupSignature) {
fi.oidLookupOffset = int64(chunkOffset)
} else if bytes.Equal(chunkID, commitDataSignature) {
fi.commitDataOffset = int64(chunkOffset)
} else if bytes.Equal(chunkID, extraEdgeListSignature) {
fi.extraEdgeListOffset = int64(chunkOffset)
} else if bytes.Equal(chunkID, lastSignature) {
break
}
}
if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.commitDataOffset <= 0 {
return ErrMalformedCommitGraphFile
}
return nil
}
func (fi *fileIndex) readFanout() error {
fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4)
for i := 0; i < 256; i++ {
fanoutValue, err := binary.ReadUint32(fanoutReader)
if err != nil {
return err
}
if fanoutValue > 0x7fffffff {
return ErrMalformedCommitGraphFile
}
fi.fanout[i] = int(fanoutValue)
}
return nil
}
func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) {
var oid plumbing.Hash
// Find the hash in the oid lookup table
var low int
if h[0] == 0 {
low = 0
} else {
low = fi.fanout[h[0]-1]
}
high := fi.fanout[h[0]]
for low < high {
mid := (low + high) >> 1
offset := fi.oidLookupOffset + int64(mid)*20
if _, err := fi.reader.ReadAt(oid[:], offset); err != nil {
return 0, err
}
cmp := bytes.Compare(h[:], oid[:])
if cmp < 0 {
high = mid
} else if cmp == 0 {
return mid, nil
} else {
low = mid + 1
}
}
return 0, plumbing.ErrObjectNotFound
}
func (fi *fileIndex) GetCommitDataByIndex(idx int) (*CommitData, error) {
if idx >= fi.fanout[0xff] {
return nil, plumbing.ErrObjectNotFound
}
offset := fi.commitDataOffset + int64(idx)*36
commitDataReader := io.NewSectionReader(fi.reader, offset, 36)
treeHash, err := binary.ReadHash(commitDataReader)
if err != nil {
return nil, err
}
parent1, err := binary.ReadUint32(commitDataReader)
if err != nil {
return nil, err
}
parent2, err := binary.ReadUint32(commitDataReader)
if err != nil {
return nil, err
}
genAndTime, err := binary.ReadUint64(commitDataReader)
if err != nil {
return nil, err
}
var parentIndexes []int
if parent2&parentOctopusUsed == parentOctopusUsed {
// Octopus merge
parentIndexes = []int{int(parent1 & parentOctopusMask)}
offset := fi.extraEdgeListOffset + 4*int64(parent2&parentOctopusMask)
buf := make([]byte, 4)
for {
_, err := fi.reader.ReadAt(buf, offset)
if err != nil {
return nil, err
}
parent := encbin.BigEndian.Uint32(buf)
offset += 4
parentIndexes = append(parentIndexes, int(parent&parentOctopusMask))
if parent&parentLast == parentLast {
break
}
}
} else if parent2 != parentNone {
parentIndexes = []int{int(parent1 & parentOctopusMask), int(parent2 & parentOctopusMask)}
} else if parent1 != parentNone {
parentIndexes = []int{int(parent1 & parentOctopusMask)}
}
parentHashes, err := fi.getHashesFromIndexes(parentIndexes)
if err != nil {
return nil, err
}
return &CommitData{
TreeHash: treeHash,
ParentIndexes: parentIndexes,
ParentHashes: parentHashes,
Generation: int(genAndTime >> 34),
When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0),
}, nil
}
func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error) {
hashes := make([]plumbing.Hash, len(indexes))
for i, idx := range indexes {
if idx >= fi.fanout[0xff] {
return nil, ErrMalformedCommitGraphFile
}
offset := fi.oidLookupOffset + int64(idx)*20
if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil {
return nil, err
}
}
return hashes, nil
}
// Hashes returns all the hashes that are available in the index
func (fi *fileIndex) Hashes() []plumbing.Hash {
hashes := make([]plumbing.Hash, fi.fanout[0xff])
for i := 0; i < int(fi.fanout[0xff]); i++ {
offset := fi.oidLookupOffset + int64(i)*20
if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 {
return nil
}
}
return hashes
}

@ -0,0 +1,72 @@
package commitgraph
import (
"gopkg.in/src-d/go-git.v4/plumbing"
)
// MemoryIndex provides a way to build the commit-graph in memory
// for later encoding to file.
type MemoryIndex struct {
commitData []*CommitData
indexMap map[plumbing.Hash]int
}
// NewMemoryIndex creates in-memory commit graph representation
func NewMemoryIndex() *MemoryIndex {
return &MemoryIndex{
indexMap: make(map[plumbing.Hash]int),
}
}
// GetIndexByHash gets the index in the commit graph from commit hash, if available
func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (int, error) {
i, ok := mi.indexMap[h]
if ok {
return i, nil
}
return 0, plumbing.ErrObjectNotFound
}
// GetCommitDataByIndex gets the commit node from the commit graph using index
// obtained from child node, if available
func (mi *MemoryIndex) GetCommitDataByIndex(i int) (*CommitData, error) {
if int(i) >= len(mi.commitData) {
return nil, plumbing.ErrObjectNotFound
}
commitData := mi.commitData[i]
// Map parent hashes to parent indexes
if commitData.ParentIndexes == nil {
parentIndexes := make([]int, len(commitData.ParentHashes))
for i, parentHash := range commitData.ParentHashes {
var err error
if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil {
return nil, err
}
}
commitData.ParentIndexes = parentIndexes
}
return commitData, nil
}
// Hashes returns all the hashes that are available in the index
func (mi *MemoryIndex) Hashes() []plumbing.Hash {
hashes := make([]plumbing.Hash, 0, len(mi.indexMap))
for k := range mi.indexMap {
hashes = append(hashes, k)
}
return hashes
}
// Add adds new node to the memory index
func (mi *MemoryIndex) Add(hash plumbing.Hash, commitData *CommitData) {
// The parent indexes are calculated lazily in GetNodeByIndex
// which allows adding nodes out of order as long as all parents
// are eventually resolved
commitData.ParentIndexes = nil
mi.indexMap[hash] = len(mi.commitData)
mi.commitData = append(mi.commitData, commitData)
}

@ -0,0 +1,98 @@
package commitgraph
import (
"io"
"time"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
// CommitNode is generic interface encapsulating a lightweight commit object retrieved
// from CommitNodeIndex
type CommitNode interface {
// ID returns the Commit object id referenced by the commit graph node.
ID() plumbing.Hash
// Tree returns the Tree referenced by the commit graph node.
Tree() (*object.Tree, error)
// CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node.
CommitTime() time.Time
// NumParents returns the number of parents in a commit.
NumParents() int
// ParentNodes return a CommitNodeIter for parents of specified node.
ParentNodes() CommitNodeIter
// ParentNode returns the ith parent of a commit.
ParentNode(i int) (CommitNode, error)
// ParentHashes returns hashes of the parent commits for a specified node
ParentHashes() []plumbing.Hash
// Generation returns the generation of the commit for reachability analysis.
// Objects with newer generation are not reachable from objects of older generation.
Generation() uint64
// Commit returns the full commit object from the node
Commit() (*object.Commit, error)
}
// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects
type CommitNodeIndex interface {
// Get returns a commit node from a commit hash
Get(hash plumbing.Hash) (CommitNode, error)
}
// CommitNodeIter is a generic closable interface for iterating over commit nodes.
type CommitNodeIter interface {
Next() (CommitNode, error)
ForEach(func(CommitNode) error) error
Close()
}
// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex.
type parentCommitNodeIter struct {
node CommitNode
i int
}
func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter {
return &parentCommitNodeIter{node, 0}
}
// Next moves the iterator to the next commit and returns a pointer to it. If
// there are no more commits, it returns io.EOF.
func (iter *parentCommitNodeIter) Next() (CommitNode, error) {
obj, err := iter.node.ParentNode(iter.i)
if err == object.ErrParentNotFound {
return nil, io.EOF
}
if err == nil {
iter.i++
}
return obj, err
}
// ForEach call the cb function for each commit contained on this iter until
// an error appends or the end of the iter is reached. If ErrStop is sent
// the iteration is stopped but no error is returned. The iterator is closed.
func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error {
for {
obj, err := iter.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
if err := cb(obj); err != nil {
if err == storer.ErrStop {
return nil
}
return err
}
}
}
func (iter *parentCommitNodeIter) Close() {
}

@ -0,0 +1,131 @@
package commitgraph
import (
"fmt"
"time"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
// graphCommitNode is a reduced representation of Commit as presented in the commit
// graph file (commitgraph.Node). It is merely useful as an optimization for walking
// the commit graphs.
//
// graphCommitNode implements the CommitNode interface.
type graphCommitNode struct {
// Hash for the Commit object
hash plumbing.Hash
// Index of the node in the commit graph file
index int
commitData *commitgraph.CommitData
gci *graphCommitNodeIndex
}
// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit
// graph files and the object store.
//
// graphCommitNodeIndex implements the CommitNodeIndex interface
type graphCommitNodeIndex struct {
commitGraph commitgraph.Index
s storer.EncodedObjectStorer
}
// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph
// files as backing storage and falls back to object storage when necessary
func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex {
return &graphCommitNodeIndex{commitGraph, s}
}
func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) {
// Check the commit graph first
parentIndex, err := gci.commitGraph.GetIndexByHash(hash)
if err == nil {
parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex)
if err != nil {
return nil, err
}
return &graphCommitNode{
hash: hash,
index: parentIndex,
commitData: parent,
gci: gci,
}, nil
}
// Fallback to loading full commit object
commit, err := object.GetCommit(gci.s, hash)
if err != nil {
return nil, err
}
return &objectCommitNode{
nodeIndex: gci,
commit: commit,
}, nil
}
func (c *graphCommitNode) ID() plumbing.Hash {
return c.hash
}
func (c *graphCommitNode) Tree() (*object.Tree, error) {
return object.GetTree(c.gci.s, c.commitData.TreeHash)
}
func (c *graphCommitNode) CommitTime() time.Time {
return c.commitData.When
}
func (c *graphCommitNode) NumParents() int {
return len(c.commitData.ParentIndexes)
}
func (c *graphCommitNode) ParentNodes() CommitNodeIter {
return newParentgraphCommitNodeIter(c)
}
func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) {
if i < 0 || i >= len(c.commitData.ParentIndexes) {
return nil, object.ErrParentNotFound
}
parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i])
if err != nil {
return nil, err
}
return &graphCommitNode{
hash: c.commitData.ParentHashes[i],
index: c.commitData.ParentIndexes[i],
commitData: parent,
gci: c.gci,
}, nil
}
func (c *graphCommitNode) ParentHashes() []plumbing.Hash {
return c.commitData.ParentHashes
}
func (c *graphCommitNode) Generation() uint64 {
// If the commit-graph file was generated with older Git version that
// set the generation to zero for every commit the generation assumption
// is still valid. It is just less useful.
return uint64(c.commitData.Generation)
}
func (c *graphCommitNode) Commit() (*object.Commit, error) {
return object.GetCommit(c.gci.s, c.hash)
}
func (c *graphCommitNode) String() string {
return fmt.Sprintf(
"%s %s\nDate: %s",
plumbing.CommitObject, c.ID(),
c.CommitTime().Format(object.DateFormat),
)
}

@ -0,0 +1,90 @@
package commitgraph
import (
"math"
"time"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
// objectCommitNode is a representation of Commit as presented in the GIT object format.
//
// objectCommitNode implements the CommitNode interface.
type objectCommitNode struct {
nodeIndex CommitNodeIndex
commit *object.Commit
}
// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses
// only object storage to load the nodes
func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex {
return &objectCommitNodeIndex{s}
}
func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) {
commit, err := object.GetCommit(oci.s, hash)
if err != nil {
return nil, err
}
return &objectCommitNode{
nodeIndex: oci,
commit: commit,
}, nil
}
// objectCommitNodeIndex is an index that can load CommitNode objects only from the
// object store.
//
// objectCommitNodeIndex implements the CommitNodeIndex interface
type objectCommitNodeIndex struct {
s storer.EncodedObjectStorer
}
func (c *objectCommitNode) CommitTime() time.Time {
return c.commit.Committer.When
}
func (c *objectCommitNode) ID() plumbing.Hash {
return c.commit.ID()
}
func (c *objectCommitNode) Tree() (*object.Tree, error) {
return c.commit.Tree()
}
func (c *objectCommitNode) NumParents() int {
return c.commit.NumParents()
}
func (c *objectCommitNode) ParentNodes() CommitNodeIter {
return newParentgraphCommitNodeIter(c)
}
func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) {
if i < 0 || i >= len(c.commit.ParentHashes) {
return nil, object.ErrParentNotFound
}
// Note: It's necessary to go through CommitNodeIndex here to ensure
// that if the commit-graph file covers only part of the history we
// start using it when that part is reached.
return c.nodeIndex.Get(c.commit.ParentHashes[i])
}
func (c *objectCommitNode) ParentHashes() []plumbing.Hash {
return c.commit.ParentHashes
}
func (c *objectCommitNode) Generation() uint64 {
// Commit nodes representing objects outside of the commit graph can never
// be reached by objects from the commit-graph thus we return the highest
// possible value.
return math.MaxUint64
}
func (c *objectCommitNode) Commit() (*object.Commit, error) {
return c.commit, nil
}

@ -0,0 +1,105 @@
package commitgraph
import (
"io"
"github.com/emirpasic/gods/trees/binaryheap"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
type commitNodeIteratorByCTime struct {
heap *binaryheap.Heap
seenExternal map[plumbing.Hash]bool
seen map[plumbing.Hash]bool
}
// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history,
// starting at the given commit and visiting its parents while preserving Committer Time order.
// this appears to be the closest order to `git log`
// The given callback will be called for each visited commit. Each commit will
// be visited only once. If the callback returns an error, walking will stop
// and will return the error. Other errors might be returned if the history
// cannot be traversed (e.g. missing objects). Ignore allows to skip some
// commits from being iterated.
func NewCommitNodeIterCTime(
c CommitNode,
seenExternal map[plumbing.Hash]bool,
ignore []plumbing.Hash,
) CommitNodeIter {
seen := make(map[plumbing.Hash]bool)
for _, h := range ignore {
seen[h] = true
}
heap := binaryheap.NewWith(func(a, b interface{}) int {
if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) {
return 1
}
return -1
})
heap.Push(c)
return &commitNodeIteratorByCTime{
heap: heap,
seenExternal: seenExternal,
seen: seen,
}
}
func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) {
var c CommitNode
for {
cIn, ok := w.heap.Pop()
if !ok {
return nil, io.EOF
}
c = cIn.(CommitNode)
cID := c.ID()
if w.seen[cID] || w.seenExternal[cID] {
continue
}
w.seen[cID] = true
for i, h := range c.ParentHashes() {
if w.seen[h] || w.seenExternal[h] {
continue
}
pc, err := c.ParentNode(i)
if err != nil {
return nil, err
}
w.heap.Push(pc)
}
return c, nil
}
}
func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error {
for {
c, err := w.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
err = cb(c)
if err == storer.ErrStop {
break
}
if err != nil {
return err
}
}
return nil
}
func (w *commitNodeIteratorByCTime) Close() {}

@ -0,0 +1,7 @@
// Package commitgraph provides an interface for efficient traversal over Git
// commit graph either through the regular object storage, or optionally with
// the index stored in commit-graph file (Git 2.18+).
//
// The API and functionality of this package are considered EXPERIMENTAL and is
// not considered stable nor production ready.
package commitgraph

@ -432,7 +432,9 @@ gopkg.in/src-d/go-git.v4/config
gopkg.in/src-d/go-git.v4/plumbing
gopkg.in/src-d/go-git.v4/plumbing/cache
gopkg.in/src-d/go-git.v4/plumbing/filemode
gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph
gopkg.in/src-d/go-git.v4/plumbing/object
gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph
gopkg.in/src-d/go-git.v4/storage/filesystem
gopkg.in/src-d/go-git.v4/internal/revision
gopkg.in/src-d/go-git.v4/plumbing/format/gitignore
@ -455,8 +457,8 @@ gopkg.in/src-d/go-git.v4/utils/merkletrie/index
gopkg.in/src-d/go-git.v4/utils/merkletrie/noder
gopkg.in/src-d/go-git.v4/internal/url
gopkg.in/src-d/go-git.v4/plumbing/format/config
gopkg.in/src-d/go-git.v4/plumbing/format/diff
gopkg.in/src-d/go-git.v4/utils/binary
gopkg.in/src-d/go-git.v4/plumbing/format/diff
gopkg.in/src-d/go-git.v4/plumbing/format/idxfile
gopkg.in/src-d/go-git.v4/plumbing/format/objfile
gopkg.in/src-d/go-git.v4/storage/filesystem/dotgit

Loading…
Cancel
Save