From c08e42c47ef2a32b3b7ee422c73d6929c93b199e Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 8 Oct 2022 01:20:53 +0800 Subject: [PATCH] Refactor parseTreeEntries, speed up tree list (#21368) Close #20315 (fix the panic when parsing invalid input), Speed up #20231 (use ls-tree without size field) Introduce ListEntriesRecursiveFast (ls-tree without size) and ListEntriesRecursiveWithSize (ls-tree with size) --- modules/git/parse_nogogit.go | 78 +++++++++++----------- modules/git/parse_nogogit_test.go | 48 +++++++++++-- modules/git/repo_language_stats_nogogit.go | 2 +- modules/git/tree_gogit.go | 9 ++- modules/git/tree_nogogit.go | 19 +++++- routers/web/repo/treelist.go | 4 +- services/repository/files/tree.go | 2 +- 7 files changed, 109 insertions(+), 53 deletions(-) diff --git a/modules/git/parse_nogogit.go b/modules/git/parse_nogogit.go index c8f0f994f..fb5b63def 100644 --- a/modules/git/parse_nogogit.go +++ b/modules/git/parse_nogogit.go @@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { return parseTreeEntries(data, nil) } +var sepSpace = []byte{' '} + func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { - entries := make([]*TreeEntry, 0, 10) + var err error + entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1) for pos := 0; pos < len(data); { - // expect line to be of the form " \t" + // expect line to be of the form: + // \t + // \t + posEnd := bytes.IndexByte(data[pos:], '\n') + if posEnd == -1 { + posEnd = len(data) + } else { + posEnd += pos + } + line := data[pos:posEnd] + posTab := bytes.IndexByte(line, '\t') + if posTab == -1 { + return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line) + } + entry := new(TreeEntry) entry.ptree = ptree - if pos+6 > len(data) { - return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) + + entryAttrs := line[:posTab] + entryName := line[posTab+1:] + + entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace) + _ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type + entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace) + if len(entryAttrs) > 0 { + entrySize := entryAttrs // the last field is the space-padded-size + entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64) + entry.sized = true } - switch string(data[pos : pos+6]) { + + switch string(entryMode) { case "100644": entry.entryMode = EntryModeBlob - pos += 12 // skip over "100644 blob " case "100755": entry.entryMode = EntryModeExec - pos += 12 // skip over "100755 blob " case "120000": entry.entryMode = EntryModeSymlink - pos += 12 // skip over "120000 blob " case "160000": entry.entryMode = EntryModeCommit - pos += 14 // skip over "160000 object " case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons entry.entryMode = EntryModeTree - pos += 12 // skip over "040000 tree " default: - return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6])) + return nil, fmt.Errorf("unknown type: %v", string(entryMode)) } - if pos+40 > len(data) { - return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) - } - id, err := NewIDFromString(string(data[pos : pos+40])) + entry.ID, err = NewIDFromString(string(entryObjectID)) if err != nil { - return nil, fmt.Errorf("Invalid ls-tree output: %v", err) - } - entry.ID = id - pos += 41 // skip over sha and trailing space - - end := pos + bytes.IndexByte(data[pos:], '\t') - if end < pos { - return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) - } - entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) - entry.sized = true - - pos = end + 1 - - end = pos + bytes.IndexByte(data[pos:], '\n') - if end < pos { - return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) + return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err) } - // In case entry name is surrounded by double quotes(it happens only in git-shell). - if data[pos] == '"' { - entry.name, err = strconv.Unquote(string(data[pos:end])) + if len(entryName) > 0 && entryName[0] == '"' { + entry.name, err = strconv.Unquote(string(entryName)) if err != nil { - return nil, fmt.Errorf("Invalid ls-tree output: %v", err) + return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err) } } else { - entry.name = string(data[pos:end]) + entry.name = string(entryName) } - pos = end + 1 + pos = posEnd + 1 entries = append(entries, entry) } return entries, nil diff --git a/modules/git/parse_nogogit_test.go b/modules/git/parse_nogogit_test.go index 483f96e9a..cecd3960d 100644 --- a/modules/git/parse_nogogit_test.go +++ b/modules/git/parse_nogogit_test.go @@ -12,7 +12,7 @@ import ( "github.com/stretchr/testify/assert" ) -func TestParseTreeEntries(t *testing.T) { +func TestParseTreeEntriesLong(t *testing.T) { testCases := []struct { Input string Expected []*TreeEntry @@ -59,11 +59,47 @@ func TestParseTreeEntries(t *testing.T) { assert.NoError(t, err) assert.Len(t, entries, len(testCase.Expected)) for i, entry := range entries { - assert.EqualValues(t, testCase.Expected[i].ID, entry.ID) - assert.EqualValues(t, testCase.Expected[i].name, entry.name) - assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode) - assert.EqualValues(t, testCase.Expected[i].sized, entry.sized) - assert.EqualValues(t, testCase.Expected[i].size, entry.size) + assert.EqualValues(t, testCase.Expected[i], entry) } } } + +func TestParseTreeEntriesShort(t *testing.T) { + testCases := []struct { + Input string + Expected []*TreeEntry + }{ + { + Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af README.md +040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d assets +`, + Expected: []*TreeEntry{ + { + ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"), + name: "README.md", + entryMode: EntryModeBlob, + }, + { + ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), + name: "assets", + entryMode: EntryModeTree, + }, + }, + }, + } + for _, testCase := range testCases { + entries, err := ParseTreeEntries([]byte(testCase.Input)) + assert.NoError(t, err) + assert.Len(t, entries, len(testCase.Expected)) + for i, entry := range entries { + assert.EqualValues(t, testCase.Expected[i], entry) + } + } +} + +func TestParseTreeEntriesInvalid(t *testing.T) { + // there was a panic: "runtime error: slice bounds out of range" when the input was invalid: #20315 + entries, err := ParseTreeEntries([]byte("100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af")) + assert.Error(t, err) + assert.Len(t, entries, 0) +} diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index d237924f9..7388ef403 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -57,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err tree := commit.Tree - entries, err := tree.ListEntriesRecursive() + entries, err := tree.ListEntriesRecursiveWithSize() if err != nil { return nil, err } diff --git a/modules/git/tree_gogit.go b/modules/git/tree_gogit.go index 54f8e140f..480c5e44d 100644 --- a/modules/git/tree_gogit.go +++ b/modules/git/tree_gogit.go @@ -57,8 +57,8 @@ func (t *Tree) ListEntries() (Entries, error) { return entries, nil } -// ListEntriesRecursive returns all entries of current tree recursively including all subtrees -func (t *Tree) ListEntriesRecursive() (Entries, error) { +// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees +func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) { if t.gogitTree == nil { err := t.loadTreeObject() if err != nil { @@ -92,3 +92,8 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { return entries, nil } + +// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version +func (t *Tree) ListEntriesRecursiveFast() (Entries, error) { + return t.ListEntriesRecursiveWithSize() +} diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 7defb064a..d61d19e4c 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -99,13 +99,16 @@ func (t *Tree) ListEntries() (Entries, error) { return t.entries, err } -// ListEntriesRecursive returns all entries of current tree recursively including all subtrees -func (t *Tree) ListEntriesRecursive() (Entries, error) { +// listEntriesRecursive returns all entries of current tree recursively including all subtrees +// extraArgs could be "-l" to get the size, which is slower +func (t *Tree) listEntriesRecursive(extraArgs ...string) (Entries, error) { if t.entriesRecursiveParsed { return t.entriesRecursive, nil } - stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-l", "-r", t.ID.String()).RunStdBytes(&RunOpts{Dir: t.repo.Path}) + args := append([]string{"ls-tree", "-t", "-r"}, extraArgs...) + args = append(args, t.ID.String()) + stdout, _, runErr := NewCommand(t.repo.Ctx, args...).RunStdBytes(&RunOpts{Dir: t.repo.Path}) if runErr != nil { return nil, runErr } @@ -118,3 +121,13 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { return t.entriesRecursive, err } + +// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size +func (t *Tree) ListEntriesRecursiveFast() (Entries, error) { + return t.listEntriesRecursive() +} + +// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size +func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) { + return t.listEntriesRecursive("--long") +} diff --git a/routers/web/repo/treelist.go b/routers/web/repo/treelist.go index 35ac0d507..80f43a0c4 100644 --- a/routers/web/repo/treelist.go +++ b/routers/web/repo/treelist.go @@ -22,9 +22,9 @@ func TreeList(ctx *context.Context) { return } - entries, err := tree.ListEntriesRecursive() + entries, err := tree.ListEntriesRecursiveFast() if err != nil { - ctx.ServerError("ListEntriesRecursive", err) + ctx.ServerError("ListEntriesRecursiveFast", err) return } entries.CustomSort(base.NaturalSortLess) diff --git a/services/repository/files/tree.go b/services/repository/files/tree.go index caad73288..59e569097 100644 --- a/services/repository/files/tree.go +++ b/services/repository/files/tree.go @@ -29,7 +29,7 @@ func GetTreeBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA) var entries git.Entries if recursive { - entries, err = gitTree.ListEntriesRecursive() + entries, err = gitTree.ListEntriesRecursiveWithSize() } else { entries, err = gitTree.ListEntries() }