On open repository open common cat file batch and batch-check (#15667)

Use common git cat-file --batch and git cat-file --batch-check to
significantly reduce calls to git.
    
Signed-off-by: Andrew Thornton <art27@cantab.net>
tokarchuk/v1.17
zeripath 4 years ago committed by GitHub
parent 038e1db4df
commit 270aab429e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      modules/context/repo.go
  2. 55
      modules/git/batch_reader.go
  3. 114
      modules/git/blob_nogogit.go
  4. 5
      modules/git/blob_test.go
  5. 14
      modules/git/commit_info_nogogit.go
  6. 3
      modules/git/last_commit_cache_nogogit.go
  7. 9
      modules/git/notes_nogogit.go
  8. 48
      modules/git/parse_nogogit.go
  9. 9
      modules/git/pipeline/lfs_nogogit.go
  10. 53
      modules/git/repo_base_nogogit.go
  11. 4
      modules/git/repo_blob_nogogit.go
  12. 2
      modules/git/repo_blob_test.go
  13. 20
      modules/git/repo_branch_nogogit.go
  14. 21
      modules/git/repo_commit.go
  15. 21
      modules/git/repo_commit_gogit.go
  16. 102
      modules/git/repo_commit_nogogit.go
  17. 2
      modules/git/repo_language_stats_nogogit.go
  18. 6
      modules/git/repo_tag_nogogit.go
  19. 44
      modules/git/repo_tree_nogogit.go
  20. 1
      modules/git/tree_blob_nogogit.go
  21. 9
      modules/git/tree_entry.go
  22. 10
      modules/git/tree_entry_nogogit.go
  23. 48
      modules/git/tree_nogogit.go
  24. 2
      modules/indexer/code/bleve.go
  25. 2
      modules/indexer/code/elastic_search.go
  26. 12
      routers/repo/download.go

@ -905,12 +905,18 @@ func (ctx *Context) IssueTemplatesFromDefaultBranch() []api.IssueTemplate {
log.Debug("DataAsync: %v", err) log.Debug("DataAsync: %v", err)
continue continue
} }
defer r.Close() closed := false
defer func() {
if !closed {
_ = r.Close()
}
}()
data, err := ioutil.ReadAll(r) data, err := ioutil.ReadAll(r)
if err != nil { if err != nil {
log.Debug("ReadAll: %v", err) log.Debug("ReadAll: %v", err)
continue continue
} }
_ = r.Close()
var it api.IssueTemplate var it api.IssueTemplate
content, err := markdown.ExtractMetadata(string(data), &it) content, err := markdown.ExtractMetadata(string(data), &it)
if err != nil { if err != nil {

@ -13,9 +13,44 @@ import (
"strings" "strings"
) )
// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
type WriteCloserError interface {
io.WriteCloser
CloseWithError(err error) error
}
// CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe()
cancel := func() {
_ = batchStdinReader.Close()
_ = batchStdinWriter.Close()
_ = batchStdoutReader.Close()
_ = batchStdoutWriter.Close()
}
go func() {
stderr := strings.Builder{}
err := NewCommand("cat-file", "--batch-check").RunInDirFullPipeline(repoPath, batchStdoutWriter, &stderr, batchStdinReader)
if err != nil {
_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
} else {
_ = batchStdoutWriter.Close()
_ = batchStdinReader.Close()
}
}()
// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
batchReader := bufio.NewReader(batchStdoutReader)
return batchStdinWriter, batchReader, cancel
}
// CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function // CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) { func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout // so let's create a batch stdin and stdout
batchStdinReader, batchStdinWriter := io.Pipe() batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe() batchStdoutReader, batchStdoutWriter := io.Pipe()
@ -47,6 +82,7 @@ func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) {
// ReadBatchLine reads the header line from cat-file --batch // ReadBatchLine reads the header line from cat-file --batch
// We expect: // We expect:
// <sha> SP <type> SP <size> LF // <sha> SP <type> SP <size> LF
// sha is a 40byte not 20byte here
func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
sha, err = rd.ReadBytes(' ') sha, err = rd.ReadBytes(' ')
if err != nil { if err != nil {
@ -54,19 +90,20 @@ func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err er
} }
sha = sha[:len(sha)-1] sha = sha[:len(sha)-1]
typ, err = rd.ReadString(' ') typ, err = rd.ReadString('\n')
if err != nil { if err != nil {
return return
} }
typ = typ[:len(typ)-1]
var sizeStr string idx := strings.Index(typ, " ")
sizeStr, err = rd.ReadString('\n') if idx < 0 {
if err != nil { err = ErrNotExist{ID: string(sha)}
return return
} }
sizeStr := typ[idx+1 : len(typ)-1]
typ = typ[:idx]
size, err = strconv.ParseInt(sizeStr[:len(sizeStr)-1], 10, 64) size, err = strconv.ParseInt(sizeStr, 10, 64)
return return
} }
@ -128,7 +165,7 @@ headerLoop:
} }
// Discard the rest of the commit // Discard the rest of the commit
discard := size - n discard := size - n + 1
for discard > math.MaxInt32 { for discard > math.MaxInt32 {
_, err := rd.Discard(math.MaxInt32) _, err := rd.Discard(math.MaxInt32)
if err != nil { if err != nil {

@ -8,48 +8,54 @@ package git
import ( import (
"bufio" "bufio"
"bytes"
"io" "io"
"strconv" "io/ioutil"
"strings" "math"
) )
// Blob represents a Git object. // Blob represents a Git object.
type Blob struct { type Blob struct {
ID SHA1 ID SHA1
gotSize bool gotSize bool
size int64 size int64
repoPath string name string
name string repo *Repository
} }
// DataAsync gets a ReadCloser for the contents of a blob without reading it all. // DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output. // Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) { func (b *Blob) DataAsync() (io.ReadCloser, error) {
stdoutReader, stdoutWriter := io.Pipe() wr, rd, cancel := b.repo.CatFileBatch()
go func() { _, err := wr.Write([]byte(b.ID.String() + "\n"))
stderr := &strings.Builder{}
err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(b.repoPath, stdoutWriter, stderr, strings.NewReader(b.ID.String()+"\n"))
if err != nil {
err = ConcatenateError(err, stderr.String())
_ = stdoutWriter.CloseWithError(err)
} else {
_ = stdoutWriter.Close()
}
}()
bufReader := bufio.NewReader(stdoutReader)
_, _, size, err := ReadBatchLine(bufReader)
if err != nil { if err != nil {
stdoutReader.Close() cancel()
return nil, err return nil, err
} }
_, _, size, err := ReadBatchLine(rd)
if err != nil {
cancel()
return nil, err
}
b.gotSize = true
b.size = size
return &LimitedReaderCloser{ if size < 4096 {
R: bufReader, bs, err := ioutil.ReadAll(io.LimitReader(rd, size))
C: stdoutReader, if err != nil {
N: size, cancel()
return nil, err
}
_, err = rd.Discard(1)
return ioutil.NopCloser(bytes.NewReader(bs)), err
}
return &blobReader{
rd: rd,
n: size,
cancel: cancel,
}, nil }, nil
} }
@ -59,18 +65,66 @@ func (b *Blob) Size() int64 {
return b.size return b.size
} }
size, err := NewCommand("cat-file", "-s", b.ID.String()).RunInDir(b.repoPath) wr, rd, cancel := b.repo.CatFileBatchCheck()
defer cancel()
_, err := wr.Write([]byte(b.ID.String() + "\n"))
if err != nil { if err != nil {
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repoPath, err) log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0 return 0
} }
_, _, b.size, err = ReadBatchLine(rd)
b.size, err = strconv.ParseInt(size[:len(size)-1], 10, 64)
if err != nil { if err != nil {
log("error whilst parsing size %s for %s in %s. Error: %v", size, b.ID.String(), b.repoPath, err) log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0 return 0
} }
b.gotSize = true b.gotSize = true
return b.size return b.size
} }
type blobReader struct {
rd *bufio.Reader
n int64
cancel func()
}
func (b *blobReader) Read(p []byte) (n int, err error) {
if b.n <= 0 {
return 0, io.EOF
}
if int64(len(p)) > b.n {
p = p[0:b.n]
}
n, err = b.rd.Read(p)
b.n -= int64(n)
return
}
// Close implements io.Closer
func (b *blobReader) Close() error {
if b.n > 0 {
for b.n > math.MaxInt32 {
n, err := b.rd.Discard(math.MaxInt32)
b.n -= int64(n)
if err != nil {
b.cancel()
return err
}
b.n -= math.MaxInt32
}
n, err := b.rd.Discard(int(b.n))
b.n -= int64(n)
if err != nil {
b.cancel()
return err
}
}
if b.n == 0 {
_, err := b.rd.Discard(1)
b.n--
b.cancel()
return err
}
return nil
}

@ -29,9 +29,10 @@ func TestBlob_Data(t *testing.T) {
r, err := testBlob.DataAsync() r, err := testBlob.DataAsync()
assert.NoError(t, err) assert.NoError(t, err)
require.NotNil(t, r) require.NotNil(t, r)
defer r.Close()
data, err := ioutil.ReadAll(r) data, err := ioutil.ReadAll(r)
assert.NoError(t, r.Close())
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, output, string(data)) assert.Equal(t, output, string(data))
} }
@ -54,7 +55,7 @@ func Benchmark_Blob_Data(b *testing.B) {
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
defer r.Close()
ioutil.ReadAll(r) ioutil.ReadAll(r)
_ = r.Close()
} }
} }

@ -102,7 +102,7 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache *LastCo
} }
func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache *LastCommitCache) (map[string]*Commit, []string, error) { func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache *LastCommitCache) (map[string]*Commit, []string, error) {
wr, rd, cancel := CatFileBatch(cache.repo.Path) wr, rd, cancel := cache.repo.CatFileBatch()
defer cancel() defer cancel()
var unHitEntryPaths []string var unHitEntryPaths []string
@ -144,7 +144,7 @@ func GetLastCommitForPaths(commit *Commit, treePath string, paths []string) ([]*
} }
}() }()
batchStdinWriter, batchReader, cancel := CatFileBatch(commit.repo.Path) batchStdinWriter, batchReader, cancel := commit.repo.CatFileBatch()
defer cancel() defer cancel()
mapsize := 4096 mapsize := 4096
@ -237,6 +237,10 @@ revListLoop:
// FIXME: is there any order to the way strings are emitted from cat-file? // FIXME: is there any order to the way strings are emitted from cat-file?
// if there is - then we could skip once we've passed all of our data // if there is - then we could skip once we've passed all of our data
} }
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
break treeReadingLoop break treeReadingLoop
} }
@ -281,6 +285,9 @@ revListLoop:
return nil, err return nil, err
} }
} }
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
// if we haven't found a treeID for the target directory our search is over // if we haven't found a treeID for the target directory our search is over
if len(treeID) == 0 { if len(treeID) == 0 {
@ -345,6 +352,9 @@ revListLoop:
if err != nil { if err != nil {
return nil, err return nil, err
} }
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
commitCommits[i] = c commitCommits[i] = c
} }

@ -8,7 +8,6 @@ package git
import ( import (
"bufio" "bufio"
"io"
"path" "path"
) )
@ -36,7 +35,7 @@ func NewLastCommitCache(repoPath string, gitRepo *Repository, ttl func() int64,
} }
// Get get the last commit information by commit id and entry path // Get get the last commit information by commit id and entry path
func (c *LastCommitCache) Get(ref, entryPath string, wr *io.PipeWriter, rd *bufio.Reader) (interface{}, error) { func (c *LastCommitCache) Get(ref, entryPath string, wr WriteCloserError, rd *bufio.Reader) (interface{}, error) {
v := c.cache.Get(c.getCacheKey(c.repoPath, ref, entryPath)) v := c.cache.Get(c.getCacheKey(c.repoPath, ref, entryPath))
if vs, ok := v.(string); ok { if vs, ok := v.(string); ok {
log("LastCommitCache hit level 1: [%s:%s:%s]", ref, entryPath, vs) log("LastCommitCache hit level 1: [%s:%s:%s]", ref, entryPath, vs)

@ -43,11 +43,18 @@ func GetNote(repo *Repository, commitID string, note *Note) error {
if err != nil { if err != nil {
return err return err
} }
defer dataRc.Close() closed := false
defer func() {
if !closed {
_ = dataRc.Close()
}
}()
d, err := ioutil.ReadAll(dataRc) d, err := ioutil.ReadAll(dataRc)
if err != nil { if err != nil {
return err return err
} }
_ = dataRc.Close()
closed = true
note.Message = d note.Message = d
treePath := "" treePath := ""

@ -7,8 +7,10 @@
package git package git
import ( import (
"bufio"
"bytes" "bytes"
"fmt" "fmt"
"io"
"strconv" "strconv"
"strings" "strings"
) )
@ -86,3 +88,49 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
} }
return entries, nil return entries, nil
} }
func catBatchParseTreeEntries(ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) {
fnameBuf := make([]byte, 4096)
modeBuf := make([]byte, 40)
shaBuf := make([]byte, 40)
entries := make([]*TreeEntry, 0, 10)
loop:
for sz > 0 {
mode, fname, sha, count, err := ParseTreeLine(rd, modeBuf, fnameBuf, shaBuf)
if err != nil {
if err == io.EOF {
break loop
}
return nil, err
}
sz -= int64(count)
entry := new(TreeEntry)
entry.ptree = ptree
switch string(mode) {
case "100644":
entry.entryMode = EntryModeBlob
case "100755":
entry.entryMode = EntryModeExec
case "120000":
entry.entryMode = EntryModeSymlink
case "160000":
entry.entryMode = EntryModeCommit
case "40000":
entry.entryMode = EntryModeTree
default:
log("Unknown mode: %v", string(mode))
return nil, fmt.Errorf("unknown mode: %v", string(mode))
}
entry.ID = MustID(sha)
entry.name = string(fname)
entries = append(entries, entry)
}
if _, err := rd.Discard(1); err != nil {
return entries, err
}
return entries, nil
}

@ -43,8 +43,6 @@ func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) {
basePath := repo.Path basePath := repo.Path
hashStr := hash.String()
// Use rev-list to provide us with all commits in order // Use rev-list to provide us with all commits in order
revListReader, revListWriter := io.Pipe() revListReader, revListWriter := io.Pipe()
defer func() { defer func() {
@ -64,7 +62,7 @@ func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) {
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout // so let's create a batch stdin and stdout
batchStdinWriter, batchReader, cancel := git.CatFileBatch(repo.Path) batchStdinWriter, batchReader, cancel := repo.CatFileBatch()
defer cancel() defer cancel()
// We'll use a scanner for the revList because it's simpler than a bufio.Reader // We'll use a scanner for the revList because it's simpler than a bufio.Reader
@ -132,8 +130,7 @@ func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) {
return nil, err return nil, err
} }
n += int64(count) n += int64(count)
sha := git.To40ByteSHA(sha20byte) if bytes.Equal(sha20byte, hash[:]) {
if bytes.Equal(sha, []byte(hashStr)) {
result := LFSResult{ result := LFSResult{
Name: curPath + string(fname), Name: curPath + string(fname),
SHA: curCommit.ID.String(), SHA: curCommit.ID.String(),
@ -143,7 +140,7 @@ func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) {
} }
resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result
} else if string(mode) == git.EntryModeTree.String() { } else if string(mode) == git.EntryModeTree.String() {
trees = append(trees, sha) trees = append(trees, git.To40ByteSHA(sha20byte))
paths = append(paths, curPath+string(fname)+"/") paths = append(paths, curPath+string(fname)+"/")
} }
} }

@ -8,6 +8,8 @@
package git package git
import ( import (
"bufio"
"context"
"errors" "errors"
"path/filepath" "path/filepath"
) )
@ -19,6 +21,14 @@ type Repository struct {
tagCache *ObjectCache tagCache *ObjectCache
gpgSettings *GPGSettings gpgSettings *GPGSettings
batchCancel context.CancelFunc
batchReader *bufio.Reader
batchWriter WriteCloserError
checkCancel context.CancelFunc
checkReader *bufio.Reader
checkWriter WriteCloserError
} }
// OpenRepository opens the repository at the given path. // OpenRepository opens the repository at the given path.
@ -29,12 +39,51 @@ func OpenRepository(repoPath string) (*Repository, error) {
} else if !isDir(repoPath) { } else if !isDir(repoPath) {
return nil, errors.New("no such file or directory") return nil, errors.New("no such file or directory")
} }
return &Repository{
repo := &Repository{
Path: repoPath, Path: repoPath,
tagCache: newObjectCache(), tagCache: newObjectCache(),
}, nil }
repo.batchWriter, repo.batchReader, repo.batchCancel = CatFileBatch(repoPath)
repo.checkWriter, repo.checkReader, repo.checkCancel = CatFileBatchCheck(repo.Path)
return repo, nil
}
// CatFileBatch obtains a CatFileBatch for this repository
func (repo *Repository) CatFileBatch() (WriteCloserError, *bufio.Reader, func()) {
if repo.batchCancel == nil || repo.batchReader.Buffered() > 0 {
log("Opening temporary cat file batch for: %s", repo.Path)
return CatFileBatch(repo.Path)
}
return repo.batchWriter, repo.batchReader, func() {}
}
// CatFileBatchCheck obtains a CatFileBatchCheck for this repository
func (repo *Repository) CatFileBatchCheck() (WriteCloserError, *bufio.Reader, func()) {
if repo.checkCancel == nil || repo.checkReader.Buffered() > 0 {
log("Opening temporary cat file batch-check: %s", repo.Path)
return CatFileBatchCheck(repo.Path)
}
return repo.checkWriter, repo.checkReader, func() {}
} }
// Close this repository, in particular close the underlying gogitStorage if this is not nil // Close this repository, in particular close the underlying gogitStorage if this is not nil
func (repo *Repository) Close() { func (repo *Repository) Close() {
if repo == nil {
return
}
if repo.batchCancel != nil {
repo.batchCancel()
repo.batchReader = nil
repo.batchWriter = nil
repo.batchCancel = nil
}
if repo.checkCancel != nil {
repo.checkCancel()
repo.checkCancel = nil
repo.checkReader = nil
repo.checkWriter = nil
}
} }

@ -11,7 +11,7 @@ func (repo *Repository) getBlob(id SHA1) (*Blob, error) {
return nil, ErrNotExist{id.String(), ""} return nil, ErrNotExist{id.String(), ""}
} }
return &Blob{ return &Blob{
ID: id, ID: id,
repoPath: repo.Path, repo: repo,
}, nil }, nil
} }

@ -33,9 +33,9 @@ func TestRepository_GetBlob_Found(t *testing.T) {
dataReader, err := blob.DataAsync() dataReader, err := blob.DataAsync()
assert.NoError(t, err) assert.NoError(t, err)
defer dataReader.Close()
data, err := ioutil.ReadAll(dataReader) data, err := ioutil.ReadAll(dataReader)
assert.NoError(t, dataReader.Close())
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, testCase.Data, data) assert.Equal(t, testCase.Data, data)
} }

@ -13,12 +13,30 @@ import (
"strings" "strings"
) )
// IsReferenceExist returns true if given reference exists in the repository.
func (repo *Repository) IsReferenceExist(name string) bool {
if name == "" {
return false
}
wr, rd, cancel := repo.CatFileBatchCheck()
defer cancel()
_, err := wr.Write([]byte(name + "\n"))
if err != nil {
log("Error writing to CatFileBatchCheck %v", err)
return false
}
_, _, _, err = ReadBatchLine(rd)
return err == nil
}
// IsBranchExist returns true if given branch exists in current repository. // IsBranchExist returns true if given branch exists in current repository.
func (repo *Repository) IsBranchExist(name string) bool { func (repo *Repository) IsBranchExist(name string) bool {
if name == "" { if name == "" {
return false return false
} }
return IsReferenceExist(repo.Path, BranchPrefix+name)
return repo.IsReferenceExist(BranchPrefix + name)
} }
// GetBranches returns branches from the repository, skipping skip initial branches and // GetBranches returns branches from the repository, skipping skip initial branches and

@ -24,27 +24,6 @@ func (repo *Repository) GetTagCommitID(name string) (string, error) {
return repo.GetRefCommitID(TagPrefix + name) return repo.GetRefCommitID(TagPrefix + name)
} }
// ConvertToSHA1 returns a Hash object from a potential ID string
func (repo *Repository) ConvertToSHA1(commitID string) (SHA1, error) {
if len(commitID) == 40 {
sha1, err := NewIDFromString(commitID)
if err == nil {
return sha1, nil
}
}
actualCommitID, err := NewCommand("rev-parse", "--verify", commitID).RunInDir(repo.Path)
if err != nil {
if strings.Contains(err.Error(), "unknown revision or path") ||
strings.Contains(err.Error(), "fatal: Needed a single revision") {
return SHA1{}, ErrNotExist{commitID, ""}
}
return SHA1{}, err
}
return NewIDFromString(actualCommitID)
}
// GetCommit returns commit object of by ID string. // GetCommit returns commit object of by ID string.
func (repo *Repository) GetCommit(commitID string) (*Commit, error) { func (repo *Repository) GetCommit(commitID string) (*Commit, error) {
id, err := repo.ConvertToSHA1(commitID) id, err := repo.ConvertToSHA1(commitID)

@ -30,6 +30,27 @@ func (repo *Repository) GetRefCommitID(name string) (string, error) {
return ref.Hash().String(), nil return ref.Hash().String(), nil
} }
// ConvertToSHA1 returns a Hash object from a potential ID string
func (repo *Repository) ConvertToSHA1(commitID string) (SHA1, error) {
if len(commitID) == 40 {
sha1, err := NewIDFromString(commitID)
if err == nil {
return sha1, nil
}
}
actualCommitID, err := NewCommand("rev-parse", "--verify", commitID).RunInDir(repo.Path)
if err != nil {
if strings.Contains(err.Error(), "unknown revision or path") ||
strings.Contains(err.Error(), "fatal: Needed a single revision") {
return SHA1{}, ErrNotExist{commitID, ""}
}
return SHA1{}, err
}
return NewIDFromString(actualCommitID)
}
// IsCommitExist returns true if given commit exists in current repository. // IsCommitExist returns true if given commit exists in current repository.
func (repo *Repository) IsCommitExist(name string) bool { func (repo *Repository) IsCommitExist(name string) bool {
hash := plumbing.NewHash(name) hash := plumbing.NewHash(name)

@ -11,8 +11,6 @@ import (
"errors" "errors"
"io" "io"
"io/ioutil" "io/ioutil"
"os"
"path/filepath"
"strings" "strings"
) )
@ -35,27 +33,15 @@ func (repo *Repository) ResolveReference(name string) (string, error) {
// GetRefCommitID returns the last commit ID string of given reference (branch or tag). // GetRefCommitID returns the last commit ID string of given reference (branch or tag).
func (repo *Repository) GetRefCommitID(name string) (string, error) { func (repo *Repository) GetRefCommitID(name string) (string, error) {
if strings.HasPrefix(name, "refs/") { wr, rd, cancel := repo.CatFileBatchCheck()
// We're gonna try just reading the ref file as this is likely to be quicker than other options defer cancel()
fileInfo, err := os.Lstat(filepath.Join(repo.Path, name)) _, _ = wr.Write([]byte(name + "\n"))
if err == nil && fileInfo.Mode().IsRegular() && fileInfo.Size() == 41 { shaBs, _, _, err := ReadBatchLine(rd)
ref, err := ioutil.ReadFile(filepath.Join(repo.Path, name)) if IsErrNotExist(err) {
return "", ErrNotExist{name, ""}
if err == nil && SHAPattern.Match(ref[:40]) && ref[40] == '\n' {
return string(ref[:40]), nil
}
}
}
stdout, err := NewCommand("show-ref", "--verify", "--hash", name).RunInDir(repo.Path)
if err != nil {
if strings.Contains(err.Error(), "not a valid ref") {
return "", ErrNotExist{name, ""}
}
return "", err
} }
return strings.TrimSpace(stdout), nil return string(shaBs), nil
} }
// IsCommitExist returns true if given commit exists in current repository. // IsCommitExist returns true if given commit exists in current repository.
@ -65,31 +51,18 @@ func (repo *Repository) IsCommitExist(name string) bool {
} }
func (repo *Repository) getCommit(id SHA1) (*Commit, error) { func (repo *Repository) getCommit(id SHA1) (*Commit, error) {
stdoutReader, stdoutWriter := io.Pipe() wr, rd, cancel := repo.CatFileBatch()
defer func() { defer cancel()
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()
go func() {
stderr := strings.Builder{}
err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(repo.Path, stdoutWriter, &stderr, strings.NewReader(id.String()+"\n"))
if err != nil {
_ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
} else {
_ = stdoutWriter.Close()
}
}()
bufReader := bufio.NewReader(stdoutReader) _, _ = wr.Write([]byte(id.String() + "\n"))
return repo.getCommitFromBatchReader(bufReader, id) return repo.getCommitFromBatchReader(rd, id)
} }
func (repo *Repository) getCommitFromBatchReader(bufReader *bufio.Reader, id SHA1) (*Commit, error) { func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id SHA1) (*Commit, error) {
_, typ, size, err := ReadBatchLine(bufReader) _, typ, size, err := ReadBatchLine(rd)
if err != nil { if err != nil {
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) || IsErrNotExist(err) {
return nil, ErrNotExist{ID: id.String()} return nil, ErrNotExist{ID: id.String()}
} }
return nil, err return nil, err
@ -101,7 +74,11 @@ func (repo *Repository) getCommitFromBatchReader(bufReader *bufio.Reader, id SHA
case "tag": case "tag":
// then we need to parse the tag // then we need to parse the tag
// and load the commit // and load the commit
data, err := ioutil.ReadAll(io.LimitReader(bufReader, size)) data, err := ioutil.ReadAll(io.LimitReader(rd, size))
if err != nil {
return nil, err
}
_, err = rd.Discard(1)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -122,11 +99,50 @@ func (repo *Repository) getCommitFromBatchReader(bufReader *bufio.Reader, id SHA
return commit, nil return commit, nil
case "commit": case "commit":
return CommitFromReader(repo, id, io.LimitReader(bufReader, size)) commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size))
if err != nil {
return nil, err
}
_, err = rd.Discard(1)
if err != nil {
return nil, err
}
return commit, nil
default: default:
log("Unknown typ: %s", typ) log("Unknown typ: %s", typ)
_, err = rd.Discard(int(size) + 1)
if err != nil {
return nil, err
}
return nil, ErrNotExist{ return nil, ErrNotExist{
ID: id.String(), ID: id.String(),
} }
} }
} }
// ConvertToSHA1 returns a Hash object from a potential ID string
func (repo *Repository) ConvertToSHA1(commitID string) (SHA1, error) {
if len(commitID) == 40 && SHAPattern.MatchString(commitID) {
sha1, err := NewIDFromString(commitID)
if err == nil {
return sha1, nil
}
}
wr, rd, cancel := repo.CatFileBatchCheck()
defer cancel()
_, err := wr.Write([]byte(commitID + "\n"))
if err != nil {
return SHA1{}, err
}
sha, _, _, err := ReadBatchLine(rd)
if err != nil {
if IsErrNotExist(err) {
return SHA1{}, ErrNotExist{commitID, ""}
}
return SHA1{}, err
}
return MustIDFromString(string(sha)), nil
}

@ -21,7 +21,7 @@ import (
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
// so let's create a batch stdin and stdout // so let's create a batch stdin and stdout
batchStdinWriter, batchReader, cancel := CatFileBatch(repo.Path) batchStdinWriter, batchReader, cancel := repo.CatFileBatch()
defer cancel() defer cancel()
writeID := func(id string) error { writeID := func(id string) error {

@ -9,7 +9,11 @@ package git
// IsTagExist returns true if given tag exists in the repository. // IsTagExist returns true if given tag exists in the repository.
func (repo *Repository) IsTagExist(name string) bool { func (repo *Repository) IsTagExist(name string) bool {
return IsReferenceExist(repo.Path, TagPrefix+name) if name == "" {
return false
}
return repo.IsReferenceExist(TagPrefix + name)
} }
// GetTags returns all tags of the repository. // GetTags returns all tags of the repository.

@ -7,33 +7,18 @@
package git package git
import ( import (
"bufio"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"strings"
) )
func (repo *Repository) getTree(id SHA1) (*Tree, error) { func (repo *Repository) getTree(id SHA1) (*Tree, error) {
stdoutReader, stdoutWriter := io.Pipe() wr, rd, cancel := repo.CatFileBatch()
defer func() { defer cancel()
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()
go func() { _, _ = wr.Write([]byte(id.String() + "\n"))
stderr := &strings.Builder{}
err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(repo.Path, stdoutWriter, stderr, strings.NewReader(id.String()+"\n"))
if err != nil {
_ = stdoutWriter.CloseWithError(ConcatenateError(err, stderr.String()))
} else {
_ = stdoutWriter.Close()
}
}()
bufReader := bufio.NewReader(stdoutReader)
// ignore the SHA // ignore the SHA
_, typ, size, err := ReadBatchLine(bufReader) _, typ, size, err := ReadBatchLine(rd)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -41,7 +26,7 @@ func (repo *Repository) getTree(id SHA1) (*Tree, error) {
switch typ { switch typ {
case "tag": case "tag":
resolvedID := id resolvedID := id
data, err := ioutil.ReadAll(io.LimitReader(bufReader, size)) data, err := ioutil.ReadAll(io.LimitReader(rd, size))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -54,24 +39,27 @@ func (repo *Repository) getTree(id SHA1) (*Tree, error) {
return nil, err return nil, err
} }
commit.Tree.ResolvedID = resolvedID commit.Tree.ResolvedID = resolvedID
log("tag.commit.Tree: %s %v", commit.Tree.ID.String(), commit.Tree.repo)
return &commit.Tree, nil return &commit.Tree, nil
case "commit": case "commit":
commit, err := CommitFromReader(repo, id, io.LimitReader(bufReader, size)) commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size))
if err != nil { if err != nil {
_ = stdoutReader.CloseWithError(err) return nil, err
}
if _, err := rd.Discard(1); err != nil {
return nil, err return nil, err
} }
commit.Tree.ResolvedID = commit.ID commit.Tree.ResolvedID = commit.ID
log("commit.Tree: %s %v", commit.Tree.ID.String(), commit.Tree.repo)
return &commit.Tree, nil return &commit.Tree, nil
case "tree": case "tree":
stdoutReader.Close()
tree := NewTree(repo, id) tree := NewTree(repo, id)
tree.ResolvedID = id tree.ResolvedID = id
tree.entries, err = catBatchParseTreeEntries(tree, rd, size)
if err != nil {
return nil, err
}
tree.entriesParsed = true
return tree, nil return tree, nil
default: default:
_ = stdoutReader.CloseWithError(fmt.Errorf("unknown typ: %s", typ))
return nil, ErrNotExist{ return nil, ErrNotExist{
ID: id.String(), ID: id.String(),
} }
@ -81,12 +69,12 @@ func (repo *Repository) getTree(id SHA1) (*Tree, error) {
// GetTree find the tree object in the repository. // GetTree find the tree object in the repository.
func (repo *Repository) GetTree(idStr string) (*Tree, error) { func (repo *Repository) GetTree(idStr string) (*Tree, error) {
if len(idStr) != 40 { if len(idStr) != 40 {
res, err := NewCommand("rev-parse", "--verify", idStr).RunInDir(repo.Path) res, err := repo.GetRefCommitID(idStr)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if len(res) > 0 { if len(res) > 0 {
idStr = res[:len(res)-1] idStr = res
} }
} }
id, err := NewIDFromString(idStr) id, err := NewIDFromString(idStr)

@ -15,6 +15,7 @@ import (
func (t *Tree) GetTreeEntryByPath(relpath string) (*TreeEntry, error) { func (t *Tree) GetTreeEntryByPath(relpath string) (*TreeEntry, error) {
if len(relpath) == 0 { if len(relpath) == 0 {
return &TreeEntry{ return &TreeEntry{
ptree: t,
ID: t.ID, ID: t.ID,
name: "", name: "",
fullName: "", fullName: "",

@ -34,12 +34,19 @@ func (te *TreeEntry) FollowLink() (*TreeEntry, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer r.Close() closed := false
defer func() {
if !closed {
_ = r.Close()
}
}()
buf := make([]byte, te.Size()) buf := make([]byte, te.Size())
_, err = io.ReadFull(r, buf) _, err = io.ReadFull(r, buf)
if err != nil { if err != nil {
return nil, err return nil, err
} }
_ = r.Close()
closed = true
lnk := string(buf) lnk := string(buf)
t := te.ptree t := te.ptree

@ -84,10 +84,10 @@ func (te *TreeEntry) IsExecutable() bool {
// Blob returns the blob object the entry // Blob returns the blob object the entry
func (te *TreeEntry) Blob() *Blob { func (te *TreeEntry) Blob() *Blob {
return &Blob{ return &Blob{
ID: te.ID, ID: te.ID,
repoPath: te.ptree.repo.Path, name: te.Name(),
name: te.Name(), size: te.size,
size: te.size, gotSize: te.sized,
gotSize: te.sized, repo: te.ptree.repo,
} }
} }

@ -7,6 +7,8 @@
package git package git
import ( import (
"io"
"math"
"strings" "strings"
) )
@ -32,6 +34,52 @@ func (t *Tree) ListEntries() (Entries, error) {
return t.entries, nil return t.entries, nil
} }
if t.repo != nil {
wr, rd, cancel := t.repo.CatFileBatch()
defer cancel()
_, _ = wr.Write([]byte(t.ID.String() + "\n"))
_, typ, sz, err := ReadBatchLine(rd)
if err != nil {
return nil, err
}
if typ == "commit" {
treeID, err := ReadTreeID(rd, sz)
if err != nil && err != io.EOF {
return nil, err
}
_, _ = wr.Write([]byte(treeID + "\n"))
_, typ, sz, err = ReadBatchLine(rd)
if err != nil {
return nil, err
}
}
if typ == "tree" {
t.entries, err = catBatchParseTreeEntries(t, rd, sz)
if err != nil {
return nil, err
}
t.entriesParsed = true
return t.entries, nil
}
// Not a tree just use ls-tree instead
for sz > math.MaxInt32 {
discarded, err := rd.Discard(math.MaxInt32)
sz -= int64(discarded)
if err != nil {
return nil, err
}
}
for sz > 0 {
discarded, err := rd.Discard(int(sz))
sz -= int64(discarded)
if err != nil {
return nil, err
}
}
}
stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path) stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") { if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") {

@ -176,7 +176,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) {
return indexer, created, err return indexer, created, err
} }
func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { func (b *BleveIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
// Ignore vendored files in code search // Ignore vendored files in code search
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
return nil return nil

@ -175,7 +175,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
return exists, nil return exists, nil
} }
func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { func (b *ElasticSearchIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
// Ignore vendored files in code search // Ignore vendored files in code search
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
return nil, nil return nil, nil

@ -100,7 +100,11 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error {
if err != nil { if err != nil {
return err return err
} }
closed := false
defer func() { defer func() {
if closed {
return
}
if err = dataRc.Close(); err != nil { if err = dataRc.Close(); err != nil {
log.Error("ServeBlobOrLFS: Close: %v", err) log.Error("ServeBlobOrLFS: Close: %v", err)
} }
@ -110,6 +114,10 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error {
if pointer.IsValid() { if pointer.IsValid() {
meta, _ := ctx.Repo.Repository.GetLFSMetaObjectByOid(pointer.Oid) meta, _ := ctx.Repo.Repository.GetLFSMetaObjectByOid(pointer.Oid)
if meta == nil { if meta == nil {
if err = dataRc.Close(); err != nil {
log.Error("ServeBlobOrLFS: Close: %v", err)
}
closed = true
return ServeBlob(ctx, blob) return ServeBlob(ctx, blob)
} }
if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+pointer.Oid+`"`) { if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+pointer.Oid+`"`) {
@ -126,6 +134,10 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error {
}() }()
return ServeData(ctx, ctx.Repo.TreePath, meta.Size, lfsDataRc) return ServeData(ctx, ctx.Repo.TreePath, meta.Size, lfsDataRc)
} }
if err = dataRc.Close(); err != nil {
log.Error("ServeBlobOrLFS: Close: %v", err)
}
closed = true
return ServeBlob(ctx, blob) return ServeBlob(ctx, blob)
} }

Loading…
Cancel
Save