Use git attributes to determine generated and vendored status for language stats and diffs (#16773)

Replaces #16262
Replaces #16250
Replaces #14833

This PR first implements a `git check-attr` pipe reader - using `git check-attr --stdin -z --cached` - taking account of the change in the output format in git 1.8.5 and creates a helper function to read a tree into a temporary index file for that pipe reader.

It then wires this in to the language stats helper and into the git diff generation.

Files which are marked generated will be folded by default.

Fixes #14786
Fixes #12653
tokarchuk/v1.17
zeripath 3 years ago committed by GitHub
parent b83b4fbef9
commit 248b96d8a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 28
      modules/analyze/generated.go
  2. 285
      modules/git/repo_attribute.go
  3. 159
      modules/git/repo_attribute_test.go
  4. 39
      modules/git/repo_index.go
  5. 70
      modules/git/repo_language_stats_gogit.go
  6. 71
      modules/git/repo_language_stats_nogogit.go
  7. 2
      options/locale/locale_en-US.ini
  8. 78
      services/gitdiff/gitdiff.go
  9. 16
      templates/repo/diff/box.tmpl
  10. 3
      web_src/js/index.js

@ -0,0 +1,28 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package analyze
import (
"path/filepath"
"strings"
"github.com/go-enry/go-enry/v2/data"
)
// IsGenerated returns whether or not path is a generated path.
func IsGenerated(path string) bool {
ext := strings.ToLower(filepath.Ext(path))
if _, ok := data.GeneratedCodeExtensions[ext]; ok {
return true
}
for _, m := range data.GeneratedCodeNameMatchers {
if m(path) {
return true
}
}
return false
}

@ -6,7 +6,12 @@ package git
import ( import (
"bytes" "bytes"
"context"
"fmt" "fmt"
"io"
"os"
"strconv"
"strings"
) )
// CheckAttributeOpts represents the possible options to CheckAttribute // CheckAttributeOpts represents the possible options to CheckAttribute
@ -21,7 +26,7 @@ type CheckAttributeOpts struct {
func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) { func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) {
err := LoadGitVersion() err := LoadGitVersion()
if err != nil { if err != nil {
return nil, fmt.Errorf("Git version missing: %v", err) return nil, fmt.Errorf("git version missing: %v", err)
} }
stdOut := new(bytes.Buffer) stdOut := new(bytes.Buffer)
@ -55,13 +60,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
cmd := NewCommand(cmdArgs...) cmd := NewCommand(cmdArgs...)
if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil { if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
return nil, fmt.Errorf("Failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String()) return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
} }
// FIXME: This is incorrect on versions < 1.8.5
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'}) fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 { if len(fields)%3 != 1 {
return nil, fmt.Errorf("Wrong number of fields in return from check-attr") return nil, fmt.Errorf("wrong number of fields in return from check-attr")
} }
var name2attribute2info = make(map[string]map[string]string) var name2attribute2info = make(map[string]map[string]string)
@ -80,3 +86,276 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
return name2attribute2info, nil return name2attribute2info, nil
} }
// CheckAttributeReader provides a reader for check-attribute content that can be long running
type CheckAttributeReader struct {
// params
Attributes []string
Repo *Repository
IndexFile string
WorkTree string
stdinReader io.ReadCloser
stdinWriter *os.File
stdOut attributeWriter
cmd *Command
env []string
ctx context.Context
cancel context.CancelFunc
running chan struct{}
}
// Init initializes the cmd
func (c *CheckAttributeReader) Init(ctx context.Context) error {
c.running = make(chan struct{})
cmdArgs := []string{"check-attr", "--stdin", "-z"}
if len(c.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
cmdArgs = append(cmdArgs, "--cached")
c.env = []string{"GIT_INDEX_FILE=" + c.IndexFile}
}
if len(c.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
c.env = []string{"GIT_WORK_TREE=" + c.WorkTree}
}
if len(c.Attributes) > 0 {
cmdArgs = append(cmdArgs, c.Attributes...)
cmdArgs = append(cmdArgs, "--")
} else {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple)
c.stdOut = lw
c.stdOut.Close()
return fmt.Errorf("no provided Attributes to check")
}
c.ctx, c.cancel = context.WithCancel(ctx)
c.cmd = NewCommandContext(c.ctx, cmdArgs...)
var err error
c.stdinReader, c.stdinWriter, err = os.Pipe()
if err != nil {
return err
}
if CheckGitVersionAtLeast("1.8.5") == nil {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)
c.stdOut = lw
} else {
lw := new(lineSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)
c.stdOut = lw
}
return nil
}
// Run run cmd
func (c *CheckAttributeReader) Run() error {
stdErr := new(bytes.Buffer)
err := c.cmd.RunInDirTimeoutEnvFullPipelineFunc(c.env, -1, c.Repo.Path, c.stdOut, stdErr, c.stdinReader, func(_ context.Context, _ context.CancelFunc) error {
close(c.running)
return nil
})
defer c.cancel()
_ = c.stdOut.Close()
if err != nil && c.ctx.Err() != nil && err.Error() != "signal: killed" {
return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
}
return nil
}
// CheckPath check attr for given path
func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) {
select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
case <-c.running:
}
if _, err := c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
defer c.cancel()
return nil, err
}
if err := c.stdinWriter.Sync(); err != nil {
defer c.cancel()
return nil, err
}
rs := make(map[string]string)
for range c.Attributes {
select {
case attr := <-c.stdOut.ReadAttribute():
rs[attr.Attribute] = attr.Value
case <-c.ctx.Done():
return nil, c.ctx.Err()
}
}
return rs, nil
}
// Close close pip after use
func (c *CheckAttributeReader) Close() error {
select {
case <-c.running:
default:
close(c.running)
}
defer c.cancel()
return c.stdinWriter.Close()
}
type attributeWriter interface {
io.WriteCloser
ReadAttribute() <-chan attributeTriple
}
type attributeTriple struct {
Filename string
Attribute string
Value string
}
type nulSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
working attributeTriple
pos int
}
func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l, read := len(p), 0
nulIdx := bytes.IndexByte(p, '\x00')
for nulIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nulIdx]...)
switch wr.pos {
case 0:
wr.working = attributeTriple{
Filename: string(wr.tmp),
}
case 1:
wr.working.Attribute = string(wr.tmp)
case 2:
wr.working.Value = string(wr.tmp)
}
wr.tmp = wr.tmp[:0]
wr.pos++
if wr.pos > 2 {
wr.attributes <- wr.working
wr.pos = 0
}
read += nulIdx + 1
if l > read {
p = p[nulIdx+1:]
nulIdx = bytes.IndexByte(p, '\x00')
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return len(p), nil
}
func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}
func (wr *nulSeparatedAttributeWriter) Close() error {
close(wr.attributes)
return nil
}
type lineSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
}
func (wr *lineSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l := len(p)
nlIdx := bytes.IndexByte(p, '\n')
for nlIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nlIdx]...)
if len(wr.tmp) == 0 {
// This should not happen
if len(p) > nlIdx+1 {
wr.tmp = wr.tmp[:0]
p = p[nlIdx+1:]
nlIdx = bytes.IndexByte(p, '\n')
continue
} else {
return l, nil
}
}
working := attributeTriple{}
if wr.tmp[0] == '"' {
sb := new(strings.Builder)
remaining := string(wr.tmp[1:])
for len(remaining) > 0 {
rn, _, tail, err := strconv.UnquoteChar(remaining, '"')
if err != nil {
if len(remaining) > 2 && remaining[0] == '"' && remaining[1] == ':' && remaining[2] == ' ' {
working.Filename = sb.String()
wr.tmp = []byte(remaining[3:])
break
}
return l, fmt.Errorf("unexpected tail %s", string(remaining))
}
_, _ = sb.WriteRune(rn)
remaining = tail
}
} else {
idx := bytes.IndexByte(wr.tmp, ':')
if idx < 0 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
working.Filename = string(wr.tmp[:idx])
if len(wr.tmp) < idx+2 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
wr.tmp = wr.tmp[idx+2:]
}
idx := bytes.IndexByte(wr.tmp, ':')
if idx < 0 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
working.Attribute = string(wr.tmp[:idx])
if len(wr.tmp) < idx+2 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
working.Value = string(wr.tmp[idx+2:])
wr.attributes <- working
wr.tmp = wr.tmp[:0]
if len(p) > nlIdx+1 {
p = p[nlIdx+1:]
nlIdx = bytes.IndexByte(p, '\n')
continue
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return l, nil
}
func (wr *lineSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}
func (wr *lineSeparatedAttributeWriter) Close() error {
close(wr.attributes)
return nil
}

@ -0,0 +1,159 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
wr := &nulSeparatedAttributeWriter{
attributes: make(chan attributeTriple, 5),
}
testStr := ".gitignore\"\n\x00linguist-vendored\x00unspecified\x00"
n, err := wr.Write([]byte(testStr))
assert.Equal(t, n, len(testStr))
assert.NoError(t, err)
select {
case attr := <-wr.ReadAttribute():
assert.Equal(t, ".gitignore\"\n", attr.Filename)
assert.Equal(t, "linguist-vendored", attr.Attribute)
assert.Equal(t, "unspecified", attr.Value)
case <-time.After(100 * time.Millisecond):
assert.Fail(t, "took too long to read an attribute from the list")
}
// Write a second attribute again
n, err = wr.Write([]byte(testStr))
assert.Equal(t, n, len(testStr))
assert.NoError(t, err)
select {
case attr := <-wr.ReadAttribute():
assert.Equal(t, ".gitignore\"\n", attr.Filename)
assert.Equal(t, "linguist-vendored", attr.Attribute)
assert.Equal(t, "unspecified", attr.Value)
case <-time.After(100 * time.Millisecond):
assert.Fail(t, "took too long to read an attribute from the list")
}
//Write a partial attribute
_, err = wr.Write([]byte("incomplete-file"))
assert.NoError(t, err)
_, err = wr.Write([]byte("name\x00"))
assert.NoError(t, err)
select {
case <-wr.ReadAttribute():
assert.Fail(t, "There should not be an attribute ready to read")
case <-time.After(100 * time.Millisecond):
}
_, err = wr.Write([]byte("attribute\x00"))
assert.NoError(t, err)
select {
case <-wr.ReadAttribute():
assert.Fail(t, "There should not be an attribute ready to read")
case <-time.After(100 * time.Millisecond):
}
_, err = wr.Write([]byte("value\x00"))
assert.NoError(t, err)
attr := <-wr.ReadAttribute()
assert.Equal(t, "incomplete-filename", attr.Filename)
assert.Equal(t, "attribute", attr.Attribute)
assert.Equal(t, "value", attr.Value)
_, err = wr.Write([]byte("shouldbe.vendor\x00linguist-vendored\x00set\x00shouldbe.vendor\x00linguist-generated\x00unspecified\x00shouldbe.vendor\x00linguist-language\x00unspecified\x00"))
assert.NoError(t, err)
attr = <-wr.ReadAttribute()
assert.NoError(t, err)
assert.EqualValues(t, attributeTriple{
Filename: "shouldbe.vendor",
Attribute: "linguist-vendored",
Value: "set",
}, attr)
attr = <-wr.ReadAttribute()
assert.NoError(t, err)
assert.EqualValues(t, attributeTriple{
Filename: "shouldbe.vendor",
Attribute: "linguist-generated",
Value: "unspecified",
}, attr)
attr = <-wr.ReadAttribute()
assert.NoError(t, err)
assert.EqualValues(t, attributeTriple{
Filename: "shouldbe.vendor",
Attribute: "linguist-language",
Value: "unspecified",
}, attr)
}
func Test_lineSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
wr := &lineSeparatedAttributeWriter{
attributes: make(chan attributeTriple, 5),
}
testStr := `".gitignore\"\n": linguist-vendored: unspecified
`
n, err := wr.Write([]byte(testStr))
assert.Equal(t, n, len(testStr))
assert.NoError(t, err)
select {
case attr := <-wr.ReadAttribute():
assert.Equal(t, ".gitignore\"\n", attr.Filename)
assert.Equal(t, "linguist-vendored", attr.Attribute)
assert.Equal(t, "unspecified", attr.Value)
case <-time.After(100 * time.Millisecond):
assert.Fail(t, "took too long to read an attribute from the list")
}
// Write a second attribute again
n, err = wr.Write([]byte(testStr))
assert.Equal(t, n, len(testStr))
assert.NoError(t, err)
select {
case attr := <-wr.ReadAttribute():
assert.Equal(t, ".gitignore\"\n", attr.Filename)
assert.Equal(t, "linguist-vendored", attr.Attribute)
assert.Equal(t, "unspecified", attr.Value)
case <-time.After(100 * time.Millisecond):
assert.Fail(t, "took too long to read an attribute from the list")
}
//Write a partial attribute
_, err = wr.Write([]byte("incomplete-file"))
assert.NoError(t, err)
_, err = wr.Write([]byte("name: "))
assert.NoError(t, err)
select {
case <-wr.ReadAttribute():
assert.Fail(t, "There should not be an attribute ready to read")
case <-time.After(100 * time.Millisecond):
}
_, err = wr.Write([]byte("attribute: "))
assert.NoError(t, err)
select {
case <-wr.ReadAttribute():
assert.Fail(t, "There should not be an attribute ready to read")
case <-time.After(100 * time.Millisecond):
}
_, err = wr.Write([]byte("value\n"))
assert.NoError(t, err)
attr := <-wr.ReadAttribute()
assert.Equal(t, "incomplete-filename", attr.Filename)
assert.Equal(t, "attribute", attr.Attribute)
assert.Equal(t, "value", attr.Value)
}

@ -6,11 +6,17 @@ package git
import ( import (
"bytes" "bytes"
"context"
"io/ioutil"
"os"
"strings" "strings"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
) )
// ReadTreeToIndex reads a treeish to the index // ReadTreeToIndex reads a treeish to the index
func (repo *Repository) ReadTreeToIndex(treeish string) error { func (repo *Repository) ReadTreeToIndex(treeish string, indexFilename ...string) error {
if len(treeish) != 40 { if len(treeish) != 40 {
res, err := NewCommand("rev-parse", "--verify", treeish).RunInDir(repo.Path) res, err := NewCommand("rev-parse", "--verify", treeish).RunInDir(repo.Path)
if err != nil { if err != nil {
@ -24,17 +30,42 @@ func (repo *Repository) ReadTreeToIndex(treeish string) error {
if err != nil { if err != nil {
return err return err
} }
return repo.readTreeToIndex(id) return repo.readTreeToIndex(id, indexFilename...)
} }
func (repo *Repository) readTreeToIndex(id SHA1) error { func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error {
_, err := NewCommand("read-tree", id.String()).RunInDir(repo.Path) var env []string
if len(indexFilename) > 0 {
env = append(os.Environ(), "GIT_INDEX_FILE="+indexFilename[0])
}
_, err := NewCommand("read-tree", id.String()).RunInDirWithEnv(repo.Path, env)
if err != nil { if err != nil {
return err return err
} }
return nil return nil
} }
// ReadTreeToTemporaryIndex reads a treeish to a temporary index file
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
tmpIndex, err := ioutil.TempFile("", "index")
if err != nil {
return
}
filename = tmpIndex.Name()
cancel = func() {
err := util.Remove(filename)
if err != nil {
log.Error("failed to remove tmp index file: %v", err)
}
}
err = repo.ReadTreeToIndex(treeish, filename)
if err != nil {
defer cancel()
return "", func() {}, err
}
return
}
// EmptyIndex empties the index // EmptyIndex empties the index
func (repo *Repository) EmptyIndex() error { func (repo *Repository) EmptyIndex() error {
_, err := NewCommand("read-tree", "--empty").RunInDir(repo.Path) _, err := NewCommand("read-tree", "--empty").RunInDir(repo.Path)

@ -9,10 +9,12 @@ package git
import ( import (
"bytes" "bytes"
"context"
"io" "io"
"io/ioutil" "io/ioutil"
"code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"github.com/go-enry/go-enry/v2" "github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5"
@ -42,9 +44,73 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err return nil, err
} }
var checker *CheckAttributeReader
if CheckGitVersionAtLeast("1.7.8") == nil {
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
if err == nil {
defer deleteTemporaryFile()
checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
Repo: repo,
IndexFile: indexFilename,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()
}
defer cancel()
}
}
sizes := make(map[string]int64) sizes := make(map[string]int64)
err = tree.Files().ForEach(func(f *object.File) error { err = tree.Files().ForEach(func(f *object.File) error {
if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) || if f.Size == 0 {
return nil
}
notVendored := false
notGenerated := false
if checker != nil {
attrs, err := checker.CheckPath(f.Name)
if err == nil {
if vendored, has := attrs["linguist-vendored"]; has {
if vendored == "set" || vendored == "true" {
return nil
}
notVendored = vendored == "false"
}
if generated, has := attrs["linguist-generated"]; has {
if generated == "set" || generated == "true" {
return nil
}
notGenerated = generated == "false"
}
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) == 0 {
language = group
}
sizes[language] += f.Size
return nil
}
}
}
if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
return nil return nil
} }
@ -54,7 +120,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
if f.Size <= bigFileSize { if f.Size <= bigFileSize {
content, _ = readFile(f, fileSizeLimit) content, _ = readFile(f, fileSizeLimit)
} }
if enry.IsGenerated(f.Name, content) { if !notGenerated && enry.IsGenerated(f.Name, content) {
return nil return nil
} }

@ -10,6 +10,7 @@ package git
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"context"
"io" "io"
"math" "math"
@ -62,13 +63,78 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err return nil, err
} }
var checker *CheckAttributeReader
if CheckGitVersionAtLeast("1.7.8") == nil {
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
if err == nil {
defer deleteTemporaryFile()
checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
Repo: repo,
IndexFile: indexFilename,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()
}
defer cancel()
}
}
contentBuf := bytes.Buffer{} contentBuf := bytes.Buffer{}
var content []byte var content []byte
sizes := make(map[string]int64) sizes := make(map[string]int64)
for _, f := range entries { for _, f := range entries {
contentBuf.Reset() contentBuf.Reset()
content = contentBuf.Bytes() content = contentBuf.Bytes()
if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
if f.Size() == 0 {
continue
}
notVendored := false
notGenerated := false
if checker != nil {
attrs, err := checker.CheckPath(f.Name())
if err == nil {
if vendored, has := attrs["linguist-vendored"]; has {
if vendored == "set" || vendored == "true" {
continue
}
notVendored = vendored == "false"
}
if generated, has := attrs["linguist-generated"]; has {
if generated == "set" || generated == "true" {
continue
}
notGenerated = generated == "false"
}
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) == 0 {
language = group
}
sizes[language] += f.Size()
continue
}
}
}
if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) ||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
continue continue
} }
@ -102,11 +168,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err return nil, err
} }
} }
if enry.IsGenerated(f.Name(), content) { if !notGenerated && enry.IsGenerated(f.Name(), content) {
continue continue
} }
// TODO: Use .gitattributes file for linguist overrides
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary? // FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
// - eg. do the all the detection tests using filename first before reading content. // - eg. do the all the detection tests using filename first before reading content.
language := analyze.GetCodeLanguage(f.Name(), content) language := analyze.GetCodeLanguage(f.Name(), content)

@ -2011,6 +2011,8 @@ diff.file_byte_size = Size
diff.file_suppressed = File diff suppressed because it is too large diff.file_suppressed = File diff suppressed because it is too large
diff.file_suppressed_line_too_long = File diff suppressed because one or more lines are too long diff.file_suppressed_line_too_long = File diff suppressed because one or more lines are too long
diff.too_many_files = Some files were not shown because too many files changed in this diff diff.too_many_files = Some files were not shown because too many files changed in this diff
diff.generated = generated
diff.vendored = vendored
diff.comment.placeholder = Leave a comment diff.comment.placeholder = Leave a comment
diff.comment.markdown_info = Styling with markdown is supported. diff.comment.markdown_info = Styling with markdown is supported.
diff.comment.add_single_comment = Add single comment diff.comment.add_single_comment = Add single comment

@ -23,6 +23,7 @@ import (
"time" "time"
"code.gitea.io/gitea/models" "code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/highlight"
@ -30,6 +31,7 @@ import (
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/sergi/go-diff/diffmatchpatch" "github.com/sergi/go-diff/diffmatchpatch"
stdcharset "golang.org/x/net/html/charset" stdcharset "golang.org/x/net/html/charset"
@ -593,6 +595,8 @@ type DiffFile struct {
IsIncomplete bool IsIncomplete bool
IsIncompleteLineTooLong bool IsIncompleteLineTooLong bool
IsProtected bool IsProtected bool
IsGenerated bool
IsVendored bool
} }
// GetType returns type of diff file. // GetType returns type of diff file.
@ -1268,7 +1272,81 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID,
if err != nil { if err != nil {
return nil, fmt.Errorf("ParsePatch: %v", err) return nil, fmt.Errorf("ParsePatch: %v", err)
} }
var checker *git.CheckAttributeReader
if git.CheckGitVersionAtLeast("1.7.8") == nil {
indexFilename, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(afterCommitID)
if err == nil {
defer deleteTemporaryFile()
workdir, err := ioutil.TempDir("", "empty-work-dir")
if err != nil {
log.Error("Unable to create temporary directory: %v", err)
return nil, err
}
defer func() {
_ = util.RemoveAll(workdir)
}()
checker = &git.CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated"},
Repo: gitRepo,
IndexFile: indexFilename,
WorkTree: workdir,
}
ctx, cancel := context.WithCancel(git.DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", afterCommitID, err)
} else {
go func() {
err = checker.Run()
if err != nil && err != ctx.Err() {
log.Error("Unable to open checker for %s. Error: %v", afterCommitID, err)
}
cancel()
}()
}
defer func() {
cancel()
}()
}
}
for _, diffFile := range diff.Files { for _, diffFile := range diff.Files {
gotVendor := false
gotGenerated := false
if checker != nil {
attrs, err := checker.CheckPath(diffFile.Name)
if err == nil {
if vendored, has := attrs["linguist-vendored"]; has {
if vendored == "set" || vendored == "true" {
diffFile.IsVendored = true
gotVendor = true
} else {
gotVendor = vendored == "false"
}
}
if generated, has := attrs["linguist-generated"]; has {
if generated == "set" || generated == "true" {
diffFile.IsGenerated = true
gotGenerated = true
} else {
gotGenerated = generated == "false"
}
}
} else {
log.Error("Unexpected error: %v", err)
}
}
if !gotVendor {
diffFile.IsVendored = analyze.IsVendor(diffFile.Name)
}
if !gotGenerated {
diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name)
}
tailSection := diffFile.GetTailSection(gitRepo, beforeCommitID, afterCommitID) tailSection := diffFile.GetTailSection(gitRepo, beforeCommitID, afterCommitID)
if tailSection != nil { if tailSection != nil {
diffFile.Sections = append(diffFile.Sections, tailSection) diffFile.Sections = append(diffFile.Sections, tailSection)

@ -49,11 +49,15 @@
{{$isImage := or (call $.IsBlobAnImage $blobBase) (call $.IsBlobAnImage $blobHead)}} {{$isImage := or (call $.IsBlobAnImage $blobBase) (call $.IsBlobAnImage $blobHead)}}
{{$isCsv := (call $.IsCsvFile $file)}} {{$isCsv := (call $.IsCsvFile $file)}}
{{$showFileViewToggle := or $isImage (and (not $file.IsIncomplete) $isCsv)}} {{$showFileViewToggle := or $isImage (and (not $file.IsIncomplete) $isCsv)}}
<div class="diff-file-box diff-box file-content {{TabSizeClass $.Editorconfig $file.Name}} mt-3" id="diff-{{.Index}}"> <div class="diff-file-box diff-box file-content {{TabSizeClass $.Editorconfig $file.Name}} mt-3" id="diff-{{.Index}}" {{if $file.IsGenerated}}data-folded="true"{{end}}>
<h4 class="diff-file-header sticky-2nd-row ui top attached normal header df ac sb"> <h4 class="diff-file-header sticky-2nd-row ui top attached normal header df ac sb">
<div class="df ac"> <div class="fold-file df ac">
<a role="button" class="fold-file muted mr-2"> <a role="button" class="chevron muted mr-2">
{{if $file.IsGenerated}}
{{svg "octicon-chevron-right" 18}}
{{else}}
{{svg "octicon-chevron-down" 18}} {{svg "octicon-chevron-down" 18}}
{{end}}
</a> </a>
<div class="bold df ac"> <div class="bold df ac">
{{if $file.IsBin}} {{if $file.IsBin}}
@ -65,6 +69,12 @@
{{end}} {{end}}
</div> </div>
<span class="file mono">{{if $file.IsRenamed}}{{$file.OldName}} &rarr; {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}}</span> <span class="file mono">{{if $file.IsRenamed}}{{$file.OldName}} &rarr; {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}}</span>
{{if $file.IsGenerated}}
<span class="ui label ml-3">{{$.i18n.Tr "repo.diff.generated"}}</span>
{{end}}
{{if $file.IsVendored}}
<span class="ui label ml-3">{{$.i18n.Tr "repo.diff.vendored"}}</span>
{{end}}
</div> </div>
<div class="diff-file-header-actions df ac"> <div class="diff-file-header-actions df ac">
{{if $showFileViewToggle}} {{if $showFileViewToggle}}

@ -2349,8 +2349,9 @@ function initCodeView() {
} }
$(document).on('click', '.fold-file', ({currentTarget}) => { $(document).on('click', '.fold-file', ({currentTarget}) => {
const box = currentTarget.closest('.file-content'); const box = currentTarget.closest('.file-content');
const chevron = currentTarget.querySelector('a.chevron');
const folded = box.dataset.folded !== 'true'; const folded = box.dataset.folded !== 'true';
currentTarget.innerHTML = svg(`octicon-chevron-${folded ? 'right' : 'down'}`, 18); chevron.innerHTML = svg(`octicon-chevron-${folded ? 'right' : 'down'}`, 18);
box.dataset.folded = String(folded); box.dataset.folded = String(folded);
}); });
$(document).on('click', '.blob-excerpt', async ({currentTarget}) => { $(document).on('click', '.blob-excerpt', async ({currentTarget}) => {

Loading…
Cancel
Save