From 270aab429ef025df9a0b9bf9e3982729ae8df449 Mon Sep 17 00:00:00 2001 From: zeripath Date: Mon, 10 May 2021 02:27:03 +0100 Subject: [PATCH] On open repository open common cat file batch and batch-check (#15667) Use common git cat-file --batch and git cat-file --batch-check to significantly reduce calls to git. Signed-off-by: Andrew Thornton --- modules/context/repo.go | 8 +- modules/git/batch_reader.go | 55 ++++++++-- modules/git/blob_nogogit.go | 114 +++++++++++++++------ modules/git/blob_test.go | 5 +- modules/git/commit_info_nogogit.go | 14 ++- modules/git/last_commit_cache_nogogit.go | 3 +- modules/git/notes_nogogit.go | 9 +- modules/git/parse_nogogit.go | 48 +++++++++ modules/git/pipeline/lfs_nogogit.go | 9 +- modules/git/repo_base_nogogit.go | 53 +++++++++- modules/git/repo_blob_nogogit.go | 4 +- modules/git/repo_blob_test.go | 2 +- modules/git/repo_branch_nogogit.go | 20 +++- modules/git/repo_commit.go | 21 ---- modules/git/repo_commit_gogit.go | 21 ++++ modules/git/repo_commit_nogogit.go | 102 ++++++++++-------- modules/git/repo_language_stats_nogogit.go | 2 +- modules/git/repo_tag_nogogit.go | 6 +- modules/git/repo_tree_nogogit.go | 44 +++----- modules/git/tree_blob_nogogit.go | 1 + modules/git/tree_entry.go | 9 +- modules/git/tree_entry_nogogit.go | 10 +- modules/git/tree_nogogit.go | 48 +++++++++ modules/indexer/code/bleve.go | 2 +- modules/indexer/code/elastic_search.go | 2 +- routers/repo/download.go | 12 +++ 26 files changed, 463 insertions(+), 161 deletions(-) diff --git a/modules/context/repo.go b/modules/context/repo.go index c1f60a136..3e48b34b3 100644 --- a/modules/context/repo.go +++ b/modules/context/repo.go @@ -905,12 +905,18 @@ func (ctx *Context) IssueTemplatesFromDefaultBranch() []api.IssueTemplate { log.Debug("DataAsync: %v", err) continue } - defer r.Close() + closed := false + defer func() { + if !closed { + _ = r.Close() + } + }() data, err := ioutil.ReadAll(r) if err != nil { log.Debug("ReadAll: %v", err) continue } + _ = r.Close() var it api.IssueTemplate content, err := markdown.ExtractMetadata(string(data), &it) if err != nil { diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 1905067cb..3d3a6916f 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -13,9 +13,44 @@ import ( "strings" ) +// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function +type WriteCloserError interface { + io.WriteCloser + CloseWithError(err error) error +} + +// CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function +func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()) { + batchStdinReader, batchStdinWriter := io.Pipe() + batchStdoutReader, batchStdoutWriter := io.Pipe() + cancel := func() { + _ = batchStdinReader.Close() + _ = batchStdinWriter.Close() + _ = batchStdoutReader.Close() + _ = batchStdoutWriter.Close() + } + + go func() { + stderr := strings.Builder{} + err := NewCommand("cat-file", "--batch-check").RunInDirFullPipeline(repoPath, batchStdoutWriter, &stderr, batchStdinReader) + if err != nil { + _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) + _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) + } else { + _ = batchStdoutWriter.Close() + _ = batchStdinReader.Close() + } + }() + + // For simplicities sake we'll us a buffered reader to read from the cat-file --batch + batchReader := bufio.NewReader(batchStdoutReader) + + return batchStdinWriter, batchReader, cancel +} + // CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function -func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) { - // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. +func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) { + // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout batchStdinReader, batchStdinWriter := io.Pipe() batchStdoutReader, batchStdoutWriter := io.Pipe() @@ -47,6 +82,7 @@ func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) { // ReadBatchLine reads the header line from cat-file --batch // We expect: // SP SP LF +// sha is a 40byte not 20byte here func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { sha, err = rd.ReadBytes(' ') if err != nil { @@ -54,19 +90,20 @@ func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err er } sha = sha[:len(sha)-1] - typ, err = rd.ReadString(' ') + typ, err = rd.ReadString('\n') if err != nil { return } - typ = typ[:len(typ)-1] - var sizeStr string - sizeStr, err = rd.ReadString('\n') - if err != nil { + idx := strings.Index(typ, " ") + if idx < 0 { + err = ErrNotExist{ID: string(sha)} return } + sizeStr := typ[idx+1 : len(typ)-1] + typ = typ[:idx] - size, err = strconv.ParseInt(sizeStr[:len(sizeStr)-1], 10, 64) + size, err = strconv.ParseInt(sizeStr, 10, 64) return } @@ -128,7 +165,7 @@ headerLoop: } // Discard the rest of the commit - discard := size - n + discard := size - n + 1 for discard > math.MaxInt32 { _, err := rd.Discard(math.MaxInt32) if err != nil { diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index e917a3161..cdaeb636a 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -8,48 +8,54 @@ package git import ( "bufio" + "bytes" "io" - "strconv" - "strings" + "io/ioutil" + "math" ) // Blob represents a Git object. type Blob struct { ID SHA1 - gotSize bool - size int64 - repoPath string - name string + gotSize bool + size int64 + name string + repo *Repository } // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - stdoutReader, stdoutWriter := io.Pipe() + wr, rd, cancel := b.repo.CatFileBatch() - go func() { - stderr := &strings.Builder{} - err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(b.repoPath, stdoutWriter, stderr, strings.NewReader(b.ID.String()+"\n")) - if err != nil { - err = ConcatenateError(err, stderr.String()) - _ = stdoutWriter.CloseWithError(err) - } else { - _ = stdoutWriter.Close() - } - }() - - bufReader := bufio.NewReader(stdoutReader) - _, _, size, err := ReadBatchLine(bufReader) + _, err := wr.Write([]byte(b.ID.String() + "\n")) if err != nil { - stdoutReader.Close() + cancel() return nil, err } + _, _, size, err := ReadBatchLine(rd) + if err != nil { + cancel() + return nil, err + } + b.gotSize = true + b.size = size - return &LimitedReaderCloser{ - R: bufReader, - C: stdoutReader, - N: size, + if size < 4096 { + bs, err := ioutil.ReadAll(io.LimitReader(rd, size)) + if err != nil { + cancel() + return nil, err + } + _, err = rd.Discard(1) + return ioutil.NopCloser(bytes.NewReader(bs)), err + } + + return &blobReader{ + rd: rd, + n: size, + cancel: cancel, }, nil } @@ -59,18 +65,66 @@ func (b *Blob) Size() int64 { return b.size } - size, err := NewCommand("cat-file", "-s", b.ID.String()).RunInDir(b.repoPath) + wr, rd, cancel := b.repo.CatFileBatchCheck() + defer cancel() + _, err := wr.Write([]byte(b.ID.String() + "\n")) if err != nil { - log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repoPath, err) + log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } - - b.size, err = strconv.ParseInt(size[:len(size)-1], 10, 64) + _, _, b.size, err = ReadBatchLine(rd) if err != nil { - log("error whilst parsing size %s for %s in %s. Error: %v", size, b.ID.String(), b.repoPath, err) + log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } + b.gotSize = true return b.size } + +type blobReader struct { + rd *bufio.Reader + n int64 + cancel func() +} + +func (b *blobReader) Read(p []byte) (n int, err error) { + if b.n <= 0 { + return 0, io.EOF + } + if int64(len(p)) > b.n { + p = p[0:b.n] + } + n, err = b.rd.Read(p) + b.n -= int64(n) + return +} + +// Close implements io.Closer +func (b *blobReader) Close() error { + if b.n > 0 { + for b.n > math.MaxInt32 { + n, err := b.rd.Discard(math.MaxInt32) + b.n -= int64(n) + if err != nil { + b.cancel() + return err + } + b.n -= math.MaxInt32 + } + n, err := b.rd.Discard(int(b.n)) + b.n -= int64(n) + if err != nil { + b.cancel() + return err + } + } + if b.n == 0 { + _, err := b.rd.Discard(1) + b.n-- + b.cancel() + return err + } + return nil +} diff --git a/modules/git/blob_test.go b/modules/git/blob_test.go index d02251ed9..2ceda6c4e 100644 --- a/modules/git/blob_test.go +++ b/modules/git/blob_test.go @@ -29,9 +29,10 @@ func TestBlob_Data(t *testing.T) { r, err := testBlob.DataAsync() assert.NoError(t, err) require.NotNil(t, r) - defer r.Close() data, err := ioutil.ReadAll(r) + assert.NoError(t, r.Close()) + assert.NoError(t, err) assert.Equal(t, output, string(data)) } @@ -54,7 +55,7 @@ func Benchmark_Blob_Data(b *testing.B) { if err != nil { b.Fatal(err) } - defer r.Close() ioutil.ReadAll(r) + _ = r.Close() } } diff --git a/modules/git/commit_info_nogogit.go b/modules/git/commit_info_nogogit.go index 6dd7b11ba..b69d50dfc 100644 --- a/modules/git/commit_info_nogogit.go +++ b/modules/git/commit_info_nogogit.go @@ -102,7 +102,7 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache *LastCo } func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache *LastCommitCache) (map[string]*Commit, []string, error) { - wr, rd, cancel := CatFileBatch(cache.repo.Path) + wr, rd, cancel := cache.repo.CatFileBatch() defer cancel() var unHitEntryPaths []string @@ -144,7 +144,7 @@ func GetLastCommitForPaths(commit *Commit, treePath string, paths []string) ([]* } }() - batchStdinWriter, batchReader, cancel := CatFileBatch(commit.repo.Path) + batchStdinWriter, batchReader, cancel := commit.repo.CatFileBatch() defer cancel() mapsize := 4096 @@ -237,6 +237,10 @@ revListLoop: // FIXME: is there any order to the way strings are emitted from cat-file? // if there is - then we could skip once we've passed all of our data } + if _, err := batchReader.Discard(1); err != nil { + return nil, err + } + break treeReadingLoop } @@ -281,6 +285,9 @@ revListLoop: return nil, err } } + if _, err := batchReader.Discard(1); err != nil { + return nil, err + } // if we haven't found a treeID for the target directory our search is over if len(treeID) == 0 { @@ -345,6 +352,9 @@ revListLoop: if err != nil { return nil, err } + if _, err := batchReader.Discard(1); err != nil { + return nil, err + } commitCommits[i] = c } diff --git a/modules/git/last_commit_cache_nogogit.go b/modules/git/last_commit_cache_nogogit.go index 0a1babb11..9808216a8 100644 --- a/modules/git/last_commit_cache_nogogit.go +++ b/modules/git/last_commit_cache_nogogit.go @@ -8,7 +8,6 @@ package git import ( "bufio" - "io" "path" ) @@ -36,7 +35,7 @@ func NewLastCommitCache(repoPath string, gitRepo *Repository, ttl func() int64, } // Get get the last commit information by commit id and entry path -func (c *LastCommitCache) Get(ref, entryPath string, wr *io.PipeWriter, rd *bufio.Reader) (interface{}, error) { +func (c *LastCommitCache) Get(ref, entryPath string, wr WriteCloserError, rd *bufio.Reader) (interface{}, error) { v := c.cache.Get(c.getCacheKey(c.repoPath, ref, entryPath)) if vs, ok := v.(string); ok { log("LastCommitCache hit level 1: [%s:%s:%s]", ref, entryPath, vs) diff --git a/modules/git/notes_nogogit.go b/modules/git/notes_nogogit.go index 1379e5085..d5d194b23 100644 --- a/modules/git/notes_nogogit.go +++ b/modules/git/notes_nogogit.go @@ -43,11 +43,18 @@ func GetNote(repo *Repository, commitID string, note *Note) error { if err != nil { return err } - defer dataRc.Close() + closed := false + defer func() { + if !closed { + _ = dataRc.Close() + } + }() d, err := ioutil.ReadAll(dataRc) if err != nil { return err } + _ = dataRc.Close() + closed = true note.Message = d treePath := "" diff --git a/modules/git/parse_nogogit.go b/modules/git/parse_nogogit.go index e9e93f66f..b45b31f23 100644 --- a/modules/git/parse_nogogit.go +++ b/modules/git/parse_nogogit.go @@ -7,8 +7,10 @@ package git import ( + "bufio" "bytes" "fmt" + "io" "strconv" "strings" ) @@ -86,3 +88,49 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { } return entries, nil } + +func catBatchParseTreeEntries(ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) { + fnameBuf := make([]byte, 4096) + modeBuf := make([]byte, 40) + shaBuf := make([]byte, 40) + entries := make([]*TreeEntry, 0, 10) + +loop: + for sz > 0 { + mode, fname, sha, count, err := ParseTreeLine(rd, modeBuf, fnameBuf, shaBuf) + if err != nil { + if err == io.EOF { + break loop + } + return nil, err + } + sz -= int64(count) + entry := new(TreeEntry) + entry.ptree = ptree + + switch string(mode) { + case "100644": + entry.entryMode = EntryModeBlob + case "100755": + entry.entryMode = EntryModeExec + case "120000": + entry.entryMode = EntryModeSymlink + case "160000": + entry.entryMode = EntryModeCommit + case "40000": + entry.entryMode = EntryModeTree + default: + log("Unknown mode: %v", string(mode)) + return nil, fmt.Errorf("unknown mode: %v", string(mode)) + } + + entry.ID = MustID(sha) + entry.name = string(fname) + entries = append(entries, entry) + } + if _, err := rd.Discard(1); err != nil { + return entries, err + } + + return entries, nil +} diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 79f7528d3..6113bb301 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -43,8 +43,6 @@ func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) { basePath := repo.Path - hashStr := hash.String() - // Use rev-list to provide us with all commits in order revListReader, revListWriter := io.Pipe() defer func() { @@ -64,7 +62,7 @@ func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) { // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel := git.CatFileBatch(repo.Path) + batchStdinWriter, batchReader, cancel := repo.CatFileBatch() defer cancel() // We'll use a scanner for the revList because it's simpler than a bufio.Reader @@ -132,8 +130,7 @@ func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) { return nil, err } n += int64(count) - sha := git.To40ByteSHA(sha20byte) - if bytes.Equal(sha, []byte(hashStr)) { + if bytes.Equal(sha20byte, hash[:]) { result := LFSResult{ Name: curPath + string(fname), SHA: curCommit.ID.String(), @@ -143,7 +140,7 @@ func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) { } resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result } else if string(mode) == git.EntryModeTree.String() { - trees = append(trees, sha) + trees = append(trees, git.To40ByteSHA(sha20byte)) paths = append(paths, curPath+string(fname)+"/") } } diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index e05219a4e..c7d6019d7 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -8,6 +8,8 @@ package git import ( + "bufio" + "context" "errors" "path/filepath" ) @@ -19,6 +21,14 @@ type Repository struct { tagCache *ObjectCache gpgSettings *GPGSettings + + batchCancel context.CancelFunc + batchReader *bufio.Reader + batchWriter WriteCloserError + + checkCancel context.CancelFunc + checkReader *bufio.Reader + checkWriter WriteCloserError } // OpenRepository opens the repository at the given path. @@ -29,12 +39,51 @@ func OpenRepository(repoPath string) (*Repository, error) { } else if !isDir(repoPath) { return nil, errors.New("no such file or directory") } - return &Repository{ + + repo := &Repository{ Path: repoPath, tagCache: newObjectCache(), - }, nil + } + + repo.batchWriter, repo.batchReader, repo.batchCancel = CatFileBatch(repoPath) + repo.checkWriter, repo.checkReader, repo.checkCancel = CatFileBatchCheck(repo.Path) + + return repo, nil +} + +// CatFileBatch obtains a CatFileBatch for this repository +func (repo *Repository) CatFileBatch() (WriteCloserError, *bufio.Reader, func()) { + if repo.batchCancel == nil || repo.batchReader.Buffered() > 0 { + log("Opening temporary cat file batch for: %s", repo.Path) + return CatFileBatch(repo.Path) + } + return repo.batchWriter, repo.batchReader, func() {} +} + +// CatFileBatchCheck obtains a CatFileBatchCheck for this repository +func (repo *Repository) CatFileBatchCheck() (WriteCloserError, *bufio.Reader, func()) { + if repo.checkCancel == nil || repo.checkReader.Buffered() > 0 { + log("Opening temporary cat file batch-check: %s", repo.Path) + return CatFileBatchCheck(repo.Path) + } + return repo.checkWriter, repo.checkReader, func() {} } // Close this repository, in particular close the underlying gogitStorage if this is not nil func (repo *Repository) Close() { + if repo == nil { + return + } + if repo.batchCancel != nil { + repo.batchCancel() + repo.batchReader = nil + repo.batchWriter = nil + repo.batchCancel = nil + } + if repo.checkCancel != nil { + repo.checkCancel() + repo.checkCancel = nil + repo.checkReader = nil + repo.checkWriter = nil + } } diff --git a/modules/git/repo_blob_nogogit.go b/modules/git/repo_blob_nogogit.go index 9959420df..afb08d29c 100644 --- a/modules/git/repo_blob_nogogit.go +++ b/modules/git/repo_blob_nogogit.go @@ -11,7 +11,7 @@ func (repo *Repository) getBlob(id SHA1) (*Blob, error) { return nil, ErrNotExist{id.String(), ""} } return &Blob{ - ID: id, - repoPath: repo.Path, + ID: id, + repo: repo, }, nil } diff --git a/modules/git/repo_blob_test.go b/modules/git/repo_blob_test.go index 52a124db2..ccf418b30 100644 --- a/modules/git/repo_blob_test.go +++ b/modules/git/repo_blob_test.go @@ -33,9 +33,9 @@ func TestRepository_GetBlob_Found(t *testing.T) { dataReader, err := blob.DataAsync() assert.NoError(t, err) - defer dataReader.Close() data, err := ioutil.ReadAll(dataReader) + assert.NoError(t, dataReader.Close()) assert.NoError(t, err) assert.Equal(t, testCase.Data, data) } diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 0628a5728..13ddcf06c 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -13,12 +13,30 @@ import ( "strings" ) +// IsReferenceExist returns true if given reference exists in the repository. +func (repo *Repository) IsReferenceExist(name string) bool { + if name == "" { + return false + } + + wr, rd, cancel := repo.CatFileBatchCheck() + defer cancel() + _, err := wr.Write([]byte(name + "\n")) + if err != nil { + log("Error writing to CatFileBatchCheck %v", err) + return false + } + _, _, _, err = ReadBatchLine(rd) + return err == nil +} + // IsBranchExist returns true if given branch exists in current repository. func (repo *Repository) IsBranchExist(name string) bool { if name == "" { return false } - return IsReferenceExist(repo.Path, BranchPrefix+name) + + return repo.IsReferenceExist(BranchPrefix + name) } // GetBranches returns branches from the repository, skipping skip initial branches and diff --git a/modules/git/repo_commit.go b/modules/git/repo_commit.go index 5e2db34fd..664a7445d 100644 --- a/modules/git/repo_commit.go +++ b/modules/git/repo_commit.go @@ -24,27 +24,6 @@ func (repo *Repository) GetTagCommitID(name string) (string, error) { return repo.GetRefCommitID(TagPrefix + name) } -// ConvertToSHA1 returns a Hash object from a potential ID string -func (repo *Repository) ConvertToSHA1(commitID string) (SHA1, error) { - if len(commitID) == 40 { - sha1, err := NewIDFromString(commitID) - if err == nil { - return sha1, nil - } - } - - actualCommitID, err := NewCommand("rev-parse", "--verify", commitID).RunInDir(repo.Path) - if err != nil { - if strings.Contains(err.Error(), "unknown revision or path") || - strings.Contains(err.Error(), "fatal: Needed a single revision") { - return SHA1{}, ErrNotExist{commitID, ""} - } - return SHA1{}, err - } - - return NewIDFromString(actualCommitID) -} - // GetCommit returns commit object of by ID string. func (repo *Repository) GetCommit(commitID string) (*Commit, error) { id, err := repo.ConvertToSHA1(commitID) diff --git a/modules/git/repo_commit_gogit.go b/modules/git/repo_commit_gogit.go index 48b0cfe19..2f9b1c420 100644 --- a/modules/git/repo_commit_gogit.go +++ b/modules/git/repo_commit_gogit.go @@ -30,6 +30,27 @@ func (repo *Repository) GetRefCommitID(name string) (string, error) { return ref.Hash().String(), nil } +// ConvertToSHA1 returns a Hash object from a potential ID string +func (repo *Repository) ConvertToSHA1(commitID string) (SHA1, error) { + if len(commitID) == 40 { + sha1, err := NewIDFromString(commitID) + if err == nil { + return sha1, nil + } + } + + actualCommitID, err := NewCommand("rev-parse", "--verify", commitID).RunInDir(repo.Path) + if err != nil { + if strings.Contains(err.Error(), "unknown revision or path") || + strings.Contains(err.Error(), "fatal: Needed a single revision") { + return SHA1{}, ErrNotExist{commitID, ""} + } + return SHA1{}, err + } + + return NewIDFromString(actualCommitID) +} + // IsCommitExist returns true if given commit exists in current repository. func (repo *Repository) IsCommitExist(name string) bool { hash := plumbing.NewHash(name) diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index df56b26b0..d00c1bfc6 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -11,8 +11,6 @@ import ( "errors" "io" "io/ioutil" - "os" - "path/filepath" "strings" ) @@ -35,27 +33,15 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - if strings.HasPrefix(name, "refs/") { - // We're gonna try just reading the ref file as this is likely to be quicker than other options - fileInfo, err := os.Lstat(filepath.Join(repo.Path, name)) - if err == nil && fileInfo.Mode().IsRegular() && fileInfo.Size() == 41 { - ref, err := ioutil.ReadFile(filepath.Join(repo.Path, name)) - - if err == nil && SHAPattern.Match(ref[:40]) && ref[40] == '\n' { - return string(ref[:40]), nil - } - } - } - - stdout, err := NewCommand("show-ref", "--verify", "--hash", name).RunInDir(repo.Path) - if err != nil { - if strings.Contains(err.Error(), "not a valid ref") { - return "", ErrNotExist{name, ""} - } - return "", err + wr, rd, cancel := repo.CatFileBatchCheck() + defer cancel() + _, _ = wr.Write([]byte(name + "\n")) + shaBs, _, _, err := ReadBatchLine(rd) + if IsErrNotExist(err) { + return "", ErrNotExist{name, ""} } - return strings.TrimSpace(stdout), nil + return string(shaBs), nil } // IsCommitExist returns true if given commit exists in current repository. @@ -65,31 +51,18 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id SHA1) (*Commit, error) { - stdoutReader, stdoutWriter := io.Pipe() - defer func() { - _ = stdoutReader.Close() - _ = stdoutWriter.Close() - }() - - go func() { - stderr := strings.Builder{} - err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(repo.Path, stdoutWriter, &stderr, strings.NewReader(id.String()+"\n")) - if err != nil { - _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) - } else { - _ = stdoutWriter.Close() - } - }() + wr, rd, cancel := repo.CatFileBatch() + defer cancel() - bufReader := bufio.NewReader(stdoutReader) + _, _ = wr.Write([]byte(id.String() + "\n")) - return repo.getCommitFromBatchReader(bufReader, id) + return repo.getCommitFromBatchReader(rd, id) } -func (repo *Repository) getCommitFromBatchReader(bufReader *bufio.Reader, id SHA1) (*Commit, error) { - _, typ, size, err := ReadBatchLine(bufReader) +func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id SHA1) (*Commit, error) { + _, typ, size, err := ReadBatchLine(rd) if err != nil { - if errors.Is(err, io.EOF) { + if errors.Is(err, io.EOF) || IsErrNotExist(err) { return nil, ErrNotExist{ID: id.String()} } return nil, err @@ -101,7 +74,11 @@ func (repo *Repository) getCommitFromBatchReader(bufReader *bufio.Reader, id SHA case "tag": // then we need to parse the tag // and load the commit - data, err := ioutil.ReadAll(io.LimitReader(bufReader, size)) + data, err := ioutil.ReadAll(io.LimitReader(rd, size)) + if err != nil { + return nil, err + } + _, err = rd.Discard(1) if err != nil { return nil, err } @@ -122,11 +99,50 @@ func (repo *Repository) getCommitFromBatchReader(bufReader *bufio.Reader, id SHA return commit, nil case "commit": - return CommitFromReader(repo, id, io.LimitReader(bufReader, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + if err != nil { + return nil, err + } + _, err = rd.Discard(1) + if err != nil { + return nil, err + } + + return commit, nil default: log("Unknown typ: %s", typ) + _, err = rd.Discard(int(size) + 1) + if err != nil { + return nil, err + } return nil, ErrNotExist{ ID: id.String(), } } } + +// ConvertToSHA1 returns a Hash object from a potential ID string +func (repo *Repository) ConvertToSHA1(commitID string) (SHA1, error) { + if len(commitID) == 40 && SHAPattern.MatchString(commitID) { + sha1, err := NewIDFromString(commitID) + if err == nil { + return sha1, nil + } + } + + wr, rd, cancel := repo.CatFileBatchCheck() + defer cancel() + _, err := wr.Write([]byte(commitID + "\n")) + if err != nil { + return SHA1{}, err + } + sha, _, _, err := ReadBatchLine(rd) + if err != nil { + if IsErrNotExist(err) { + return SHA1{}, ErrNotExist{commitID, ""} + } + return SHA1{}, err + } + + return MustIDFromString(string(sha)), nil +} diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 3f197f8d7..0130d0a30 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -21,7 +21,7 @@ import ( func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel := CatFileBatch(repo.Path) + batchStdinWriter, batchReader, cancel := repo.CatFileBatch() defer cancel() writeID := func(id string) error { diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index b3fa5d6dc..a9e122aea 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -9,7 +9,11 @@ package git // IsTagExist returns true if given tag exists in the repository. func (repo *Repository) IsTagExist(name string) bool { - return IsReferenceExist(repo.Path, TagPrefix+name) + if name == "" { + return false + } + + return repo.IsReferenceExist(TagPrefix + name) } // GetTags returns all tags of the repository. diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index 867c3fa5a..967f8aea3 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -7,33 +7,18 @@ package git import ( - "bufio" - "fmt" "io" "io/ioutil" - "strings" ) func (repo *Repository) getTree(id SHA1) (*Tree, error) { - stdoutReader, stdoutWriter := io.Pipe() - defer func() { - _ = stdoutReader.Close() - _ = stdoutWriter.Close() - }() + wr, rd, cancel := repo.CatFileBatch() + defer cancel() - go func() { - stderr := &strings.Builder{} - err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(repo.Path, stdoutWriter, stderr, strings.NewReader(id.String()+"\n")) - if err != nil { - _ = stdoutWriter.CloseWithError(ConcatenateError(err, stderr.String())) - } else { - _ = stdoutWriter.Close() - } - }() + _, _ = wr.Write([]byte(id.String() + "\n")) - bufReader := bufio.NewReader(stdoutReader) // ignore the SHA - _, typ, size, err := ReadBatchLine(bufReader) + _, typ, size, err := ReadBatchLine(rd) if err != nil { return nil, err } @@ -41,7 +26,7 @@ func (repo *Repository) getTree(id SHA1) (*Tree, error) { switch typ { case "tag": resolvedID := id - data, err := ioutil.ReadAll(io.LimitReader(bufReader, size)) + data, err := ioutil.ReadAll(io.LimitReader(rd, size)) if err != nil { return nil, err } @@ -54,24 +39,27 @@ func (repo *Repository) getTree(id SHA1) (*Tree, error) { return nil, err } commit.Tree.ResolvedID = resolvedID - log("tag.commit.Tree: %s %v", commit.Tree.ID.String(), commit.Tree.repo) return &commit.Tree, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(bufReader, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) if err != nil { - _ = stdoutReader.CloseWithError(err) + return nil, err + } + if _, err := rd.Discard(1); err != nil { return nil, err } commit.Tree.ResolvedID = commit.ID - log("commit.Tree: %s %v", commit.Tree.ID.String(), commit.Tree.repo) return &commit.Tree, nil case "tree": - stdoutReader.Close() tree := NewTree(repo, id) tree.ResolvedID = id + tree.entries, err = catBatchParseTreeEntries(tree, rd, size) + if err != nil { + return nil, err + } + tree.entriesParsed = true return tree, nil default: - _ = stdoutReader.CloseWithError(fmt.Errorf("unknown typ: %s", typ)) return nil, ErrNotExist{ ID: id.String(), } @@ -81,12 +69,12 @@ func (repo *Repository) getTree(id SHA1) (*Tree, error) { // GetTree find the tree object in the repository. func (repo *Repository) GetTree(idStr string) (*Tree, error) { if len(idStr) != 40 { - res, err := NewCommand("rev-parse", "--verify", idStr).RunInDir(repo.Path) + res, err := repo.GetRefCommitID(idStr) if err != nil { return nil, err } if len(res) > 0 { - idStr = res[:len(res)-1] + idStr = res } } id, err := NewIDFromString(idStr) diff --git a/modules/git/tree_blob_nogogit.go b/modules/git/tree_blob_nogogit.go index 6da0ccfe8..fdd8d79c8 100644 --- a/modules/git/tree_blob_nogogit.go +++ b/modules/git/tree_blob_nogogit.go @@ -15,6 +15,7 @@ import ( func (t *Tree) GetTreeEntryByPath(relpath string) (*TreeEntry, error) { if len(relpath) == 0 { return &TreeEntry{ + ptree: t, ID: t.ID, name: "", fullName: "", diff --git a/modules/git/tree_entry.go b/modules/git/tree_entry.go index 498767a63..3644d00f3 100644 --- a/modules/git/tree_entry.go +++ b/modules/git/tree_entry.go @@ -34,12 +34,19 @@ func (te *TreeEntry) FollowLink() (*TreeEntry, error) { if err != nil { return nil, err } - defer r.Close() + closed := false + defer func() { + if !closed { + _ = r.Close() + } + }() buf := make([]byte, te.Size()) _, err = io.ReadFull(r, buf) if err != nil { return nil, err } + _ = r.Close() + closed = true lnk := string(buf) t := te.ptree diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index fd60de36f..41356ceba 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -84,10 +84,10 @@ func (te *TreeEntry) IsExecutable() bool { // Blob returns the blob object the entry func (te *TreeEntry) Blob() *Blob { return &Blob{ - ID: te.ID, - repoPath: te.ptree.repo.Path, - name: te.Name(), - size: te.size, - gotSize: te.sized, + ID: te.ID, + name: te.Name(), + size: te.size, + gotSize: te.sized, + repo: te.ptree.repo, } } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 3ebdf1063..9661d8fae 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -7,6 +7,8 @@ package git import ( + "io" + "math" "strings" ) @@ -32,6 +34,52 @@ func (t *Tree) ListEntries() (Entries, error) { return t.entries, nil } + if t.repo != nil { + wr, rd, cancel := t.repo.CatFileBatch() + defer cancel() + + _, _ = wr.Write([]byte(t.ID.String() + "\n")) + _, typ, sz, err := ReadBatchLine(rd) + if err != nil { + return nil, err + } + if typ == "commit" { + treeID, err := ReadTreeID(rd, sz) + if err != nil && err != io.EOF { + return nil, err + } + _, _ = wr.Write([]byte(treeID + "\n")) + _, typ, sz, err = ReadBatchLine(rd) + if err != nil { + return nil, err + } + } + if typ == "tree" { + t.entries, err = catBatchParseTreeEntries(t, rd, sz) + if err != nil { + return nil, err + } + t.entriesParsed = true + return t.entries, nil + } + + // Not a tree just use ls-tree instead + for sz > math.MaxInt32 { + discarded, err := rd.Discard(math.MaxInt32) + sz -= int64(discarded) + if err != nil { + return nil, err + } + } + for sz > 0 { + discarded, err := rd.Discard(int(sz)) + sz -= int64(discarded) + if err != nil { + return nil, err + } + } + } + stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path) if err != nil { if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") { diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 416adeea7..1d6aa51bc 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -176,7 +176,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { return indexer, created, err } -func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { +func (b *BleveIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elastic_search.go index ebb7910fd..982b36e8d 100644 --- a/modules/indexer/code/elastic_search.go +++ b/modules/indexer/code/elastic_search.go @@ -175,7 +175,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) { return exists, nil } -func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { +func (b *ElasticSearchIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil diff --git a/routers/repo/download.go b/routers/repo/download.go index 1eedec8cb..dafa62d0d 100644 --- a/routers/repo/download.go +++ b/routers/repo/download.go @@ -100,7 +100,11 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error { if err != nil { return err } + closed := false defer func() { + if closed { + return + } if err = dataRc.Close(); err != nil { log.Error("ServeBlobOrLFS: Close: %v", err) } @@ -110,6 +114,10 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error { if pointer.IsValid() { meta, _ := ctx.Repo.Repository.GetLFSMetaObjectByOid(pointer.Oid) if meta == nil { + if err = dataRc.Close(); err != nil { + log.Error("ServeBlobOrLFS: Close: %v", err) + } + closed = true return ServeBlob(ctx, blob) } if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+pointer.Oid+`"`) { @@ -126,6 +134,10 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error { }() return ServeData(ctx, ctx.Repo.TreePath, meta.Size, lfsDataRc) } + if err = dataRc.Close(); err != nil { + log.Error("ServeBlobOrLFS: Close: %v", err) + } + closed = true return ServeBlob(ctx, blob) }