From 0c6137617fbf41ee6cb315f96a2acc2dd91203e8 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 29 Mar 2021 22:44:28 +0200 Subject: [PATCH] Add Tabular Diff for CSV files (#14661) Implements request #14320 The rendering of CSV files does match the diff style. * Moved CSV logic into base package. * Added method to create a tabular diff. * Added CSV compare context. * Added CSV diff template. * Use new table style in CSV markup. * Added file size limit for CSV rendering. * Display CSV parser errors in diff. * Lazy read single file. * Lazy read rows for full diff. * Added unit tests for various CSV changes. --- custom/conf/app.example.ini | 4 + .../doc/advanced/config-cheat-sheet.en-us.md | 4 + modules/csv/csv.go | 93 +++++ modules/csv/csv_test.go | 40 ++ modules/markup/csv/csv.go | 103 ++--- modules/markup/csv/csv_test.go | 12 +- modules/markup/sanitizer.go | 4 + modules/setting/setting.go | 9 + options/locale/locale_en-US.ini | 5 + routers/repo/commit.go | 3 +- routers/repo/compare.go | 86 +++- routers/repo/pull.go | 6 +- services/gitdiff/csv.go | 379 ++++++++++++++++++ services/gitdiff/csv_test.go | 119 ++++++ templates/repo/diff/box.tmpl | 43 +- templates/repo/diff/csv_diff.tmpl | 46 +++ templates/repo/view_file.tmpl | 2 +- web_src/js/index.js | 13 + web_src/less/_markdown.less | 25 -- web_src/less/_repository.less | 59 +++ 20 files changed, 937 insertions(+), 118 deletions(-) create mode 100644 modules/csv/csv.go create mode 100644 modules/csv/csv_test.go create mode 100644 services/gitdiff/csv.go create mode 100644 services/gitdiff/csv_test.go create mode 100644 templates/repo/diff/csv_diff.tmpl diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index fe4fec7e9..1e0cd5f28 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -248,6 +248,10 @@ EVENT_SOURCE_UPDATE_TIME = 10s ; Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images. ENABLE_RENDER = true +[ui.csv] +; Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit). +MAX_FILE_SIZE = 524288 + [markdown] ; Render soft line breaks as hard line breaks, which means a single newline character between ; paragraphs will cause a line break and adding trailing whitespace to paragraphs is not diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index e32112f02..255bfb2b9 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -198,6 +198,10 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `ENABLE_RENDER`: **true**: Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images. +### UI - CSV Files (`ui.csv`) + +- `MAX_FILE_SIZE`: **524288** (512kb): Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit). + ## Markdown (`markdown`) - `ENABLE_HARD_LINE_BREAK_IN_COMMENTS`: **true**: Render soft line breaks as hard line breaks in comments, which diff --git a/modules/csv/csv.go b/modules/csv/csv.go new file mode 100644 index 000000000..1aa78fdee --- /dev/null +++ b/modules/csv/csv.go @@ -0,0 +1,93 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "bytes" + "encoding/csv" + "errors" + "regexp" + "strings" + + "code.gitea.io/gitea/modules/translation" + "code.gitea.io/gitea/modules/util" +) + +var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) + +// CreateReader creates a csv.Reader with the given delimiter. +func CreateReader(rawBytes []byte, delimiter rune) *csv.Reader { + rd := csv.NewReader(bytes.NewReader(rawBytes)) + rd.Comma = delimiter + rd.TrimLeadingSpace = true + return rd +} + +// CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader. +func CreateReaderAndGuessDelimiter(rawBytes []byte) *csv.Reader { + delimiter := guessDelimiter(rawBytes) + return CreateReader(rawBytes, delimiter) +} + +// guessDelimiter scores the input CSV data against delimiters, and returns the best match. +// Reads at most 10k bytes & 10 lines. +func guessDelimiter(data []byte) rune { + maxLines := 10 + maxBytes := util.Min(len(data), 1e4) + text := string(data[:maxBytes]) + text = quoteRegexp.ReplaceAllLiteralString(text, "") + lines := strings.SplitN(text, "\n", maxLines+1) + lines = lines[:util.Min(maxLines, len(lines))] + + delimiters := []rune{',', ';', '\t', '|', '@'} + bestDelim := delimiters[0] + bestScore := 0.0 + for _, delim := range delimiters { + score := scoreDelimiter(lines, delim) + if score > bestScore { + bestScore = score + bestDelim = delim + } + } + + return bestDelim +} + +// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV. +func scoreDelimiter(lines []string, delim rune) float64 { + countTotal := 0 + countLineMax := 0 + linesNotEqual := 0 + + for _, line := range lines { + if len(line) == 0 { + continue + } + + countLine := strings.Count(line, string(delim)) + countTotal += countLine + if countLine != countLineMax { + if countLineMax != 0 { + linesNotEqual++ + } + countLineMax = util.Max(countLine, countLineMax) + } + } + + return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) +} + +// FormatError converts csv errors into readable messages. +func FormatError(err error, locale translation.Locale) (string, error) { + var perr *csv.ParseError + if errors.As(err, &perr) { + if perr.Err == csv.ErrFieldCount { + return locale.Tr("repo.error.csv.invalid_field_count", perr.Line), nil + } + return locale.Tr("repo.error.csv.unexpected", perr.Line, perr.Column), nil + } + + return "", err +} diff --git a/modules/csv/csv_test.go b/modules/csv/csv_test.go new file mode 100644 index 000000000..3a7584e21 --- /dev/null +++ b/modules/csv/csv_test.go @@ -0,0 +1,40 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCreateReader(t *testing.T) { + rd := CreateReader([]byte{}, ',') + assert.Equal(t, ',', rd.Comma) +} + +func TestCreateReaderAndGuessDelimiter(t *testing.T) { + input := "a;b;c\n1;2;3\n4;5;6" + + rd := CreateReaderAndGuessDelimiter([]byte(input)) + assert.Equal(t, ';', rd.Comma) +} + +func TestGuessDelimiter(t *testing.T) { + var kases = map[string]rune{ + "a": ',', + "1,2": ',', + "1;2": ';', + "1\t2": '\t', + "1|2": '|', + "1,2,3;4,5,6;7,8,9\na;b;c": ';', + "\"1,2,3,4\";\"a\nb\"\nc;d": ';', + "
": ',', + } + + for k, v := range kases { + assert.EqualValues(t, guessDelimiter([]byte(k)), v) + } +} diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 1e3acc9b4..430e759eb 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -6,24 +6,20 @@ package markup import ( "bytes" - "encoding/csv" "html" "io" - "regexp" - "strings" + "strconv" + "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/markup" - "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/modules/setting" ) -var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) - func init() { markup.RegisterParser(Parser{}) - } -// Parser implements markup.Parser for orgmode +// Parser implements markup.Parser for csv files type Parser struct { } @@ -38,11 +34,35 @@ func (Parser) Extensions() []string { } // Render implements markup.Parser -func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { - rd := csv.NewReader(bytes.NewReader(rawBytes)) - rd.Comma = p.bestDelimiter(rawBytes) +func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { var tmpBlock bytes.Buffer - tmpBlock.WriteString(``) + + if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) { + tmpBlock.WriteString("
")
+		tmpBlock.WriteString(html.EscapeString(string(rawBytes)))
+		tmpBlock.WriteString("
") + return tmpBlock.Bytes() + } + + rd := csv.CreateReaderAndGuessDelimiter(rawBytes) + + writeField := func(element, class, field string) { + tmpBlock.WriteString("<") + tmpBlock.WriteString(element) + if len(class) > 0 { + tmpBlock.WriteString(" class=\"") + tmpBlock.WriteString(class) + tmpBlock.WriteString("\"") + } + tmpBlock.WriteString(">") + tmpBlock.WriteString(html.EscapeString(field)) + tmpBlock.WriteString("") + } + + tmpBlock.WriteString(`
`) + row := 1 for { fields, err := rd.Read() if err == io.EOF { @@ -52,62 +72,19 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin continue } tmpBlock.WriteString("") + element := "td" + if row == 1 { + element = "th" + } + writeField(element, "line-num", strconv.Itoa(row)) for _, field := range fields { - tmpBlock.WriteString("") + writeField(element, "", field) } tmpBlock.WriteString("") + + row++ } tmpBlock.WriteString("
") - tmpBlock.WriteString(html.EscapeString(field)) - tmpBlock.WriteString("
") return tmpBlock.Bytes() } - -// bestDelimiter scores the input CSV data against delimiters, and returns the best match. -// Reads at most 10k bytes & 10 lines. -func (p Parser) bestDelimiter(data []byte) rune { - maxLines := 10 - maxBytes := util.Min(len(data), 1e4) - text := string(data[:maxBytes]) - text = quoteRegexp.ReplaceAllLiteralString(text, "") - lines := strings.SplitN(text, "\n", maxLines+1) - lines = lines[:util.Min(maxLines, len(lines))] - - delimiters := []rune{',', ';', '\t', '|'} - bestDelim := delimiters[0] - bestScore := 0.0 - for _, delim := range delimiters { - score := p.scoreDelimiter(lines, delim) - if score > bestScore { - bestScore = score - bestDelim = delim - } - } - - return bestDelim -} - -// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV -func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) { - countTotal := 0 - countLineMax := 0 - linesNotEqual := 0 - - for _, line := range lines { - if len(line) == 0 { - continue - } - - countLine := strings.Count(line, string(delim)) - countTotal += countLine - if countLine != countLineMax { - if countLineMax != 0 { - linesNotEqual++ - } - countLineMax = util.Max(countLine, countLineMax) - } - } - - return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) -} diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index 4d4e0871e..5438ebdf5 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -13,14 +13,10 @@ import ( func TestRenderCSV(t *testing.T) { var parser Parser var kases = map[string]string{ - "a": "
a
", - "1,2": "
12
", - "1;2": "
12
", - "1\t2": "
12
", - "1|2": "
12
", - "1,2,3;4,5,6;7,8,9\na;b;c": "
1,2,34,5,67,8,9
abc
", - "\"1,2,3,4\";\"a\nb\"\nc;d": "
1,2,3,4a\nb
cd
", - "
": "
<br/>
", + "a": "
1a
", + "1,2": "
112
", + "1;2\n3;4": "
112
234
", + "
": "
1<br/>
", } for k, v := range kases { diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index 9214a75fb..19feaa3cc 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -69,6 +69,10 @@ func ReplaceSanitizer() { // Allow icons, emojis, and chroma syntax on span sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$`)).OnElements("span") + // Allow data tables + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table") + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td") + // Allow generally safe attributes generalSafeAttrs := []string{"abbr", "accept", "accept-charset", "accesskey", "action", "align", "alt", diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 6a9868713..280987ed6 100644 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -213,6 +213,10 @@ var ( Enabled bool `ini:"ENABLE_RENDER"` } `ini:"ui.svg"` + CSV struct { + MaxFileSize int64 + } `ini:"ui.csv"` + Admin struct { UserPagingNum int RepoPagingNum int @@ -258,6 +262,11 @@ var ( }{ Enabled: true, }, + CSV: struct { + MaxFileSize int64 + }{ + MaxFileSize: 524288, + }, Admin: struct { UserPagingNum int RepoPagingNum int diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 3a8783930..3be209ffe 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1860,6 +1860,7 @@ diff.whitespace_ignore_at_eol = Ignore changes in whitespace at EOL diff.stats_desc = %d changed files with %d additions and %d deletions diff.stats_desc_file = %d changes: %d additions and %d deletions diff.bin = BIN +diff.bin_not_shown = Binary file not shown. diff.view_file = View File diff.file_before = Before diff.file_after = After @@ -1960,6 +1961,10 @@ topic.done = Done topic.count_prompt = You can not select more than 25 topics topic.format_prompt = Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long. +error.csv.too_large = Can't render this file because it is too large. +error.csv.unexpected = Can't render this file because it contains an unexpected character in line %d and column %d. +error.csv.invalid_field_count = Can't render this file because it has a wrong number of fields in line %d. + [org] org_name_holder = Organization Name org_full_name_holder = Organization Full Name diff --git a/routers/repo/commit.go b/routers/repo/commit.go index c3ee6b5ac..c06d09261 100644 --- a/routers/repo/commit.go +++ b/routers/repo/commit.go @@ -336,9 +336,8 @@ func Diff(ctx *context.Context) { return } } - setImageCompareContext(ctx, parentCommit, commit) headTarget := path.Join(userName, repoName) - setPathsCompareContext(ctx, parentCommit, commit, headTarget) + setCompareContext(ctx, parentCommit, commit, headTarget) ctx.Data["Title"] = commit.Summary() + " ยท " + base.ShortSha(commitID) ctx.Data["Commit"] = commit verification := models.ParseCommitWithSignature(commit) diff --git a/routers/repo/compare.go b/routers/repo/compare.go index 38c3005cf..0b7bdf764 100644 --- a/routers/repo/compare.go +++ b/routers/repo/compare.go @@ -6,14 +6,20 @@ package repo import ( "bufio" + "encoding/csv" + "errors" "fmt" "html" + "io/ioutil" "path" + "path/filepath" "strings" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/context" + csv_module "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -26,6 +32,16 @@ const ( tplBlobExcerpt base.TplName = "repo/diff/blob_excerpt" ) +// setCompareContext sets context data. +func setCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) { + ctx.Data["BaseCommit"] = base + ctx.Data["HeadCommit"] = head + + setPathsCompareContext(ctx, base, head, headTarget) + setImageCompareContext(ctx, base, head) + setCsvCompareContext(ctx) +} + // setPathsCompareContext sets context data for source and raw paths func setPathsCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) { sourcePath := setting.AppSubURL + "/%s/src/commit/%s" @@ -65,6 +81,73 @@ func setImageCompareContext(ctx *context.Context, base *git.Commit, head *git.Co } } +// setCsvCompareContext sets context data that is required by the CSV compare template +func setCsvCompareContext(ctx *context.Context) { + ctx.Data["IsCsvFile"] = func(diffFile *gitdiff.DiffFile) bool { + extension := strings.ToLower(filepath.Ext(diffFile.Name)) + return extension == ".csv" || extension == ".tsv" + } + + type CsvDiffResult struct { + Sections []*gitdiff.TableDiffSection + Error string + } + + ctx.Data["CreateCsvDiff"] = func(diffFile *gitdiff.DiffFile, baseCommit *git.Commit, headCommit *git.Commit) CsvDiffResult { + if diffFile == nil || baseCommit == nil || headCommit == nil { + return CsvDiffResult{nil, ""} + } + + errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large")) + + csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, error) { + blob, err := c.GetBlobByPath(diffFile.Name) + if err != nil { + return nil, err + } + + if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < blob.Size() { + return nil, errTooLarge + } + + reader, err := blob.DataAsync() + if err != nil { + return nil, err + } + defer reader.Close() + + b, err := ioutil.ReadAll(reader) + if err != nil { + return nil, err + } + + b = charset.ToUTF8WithFallback(b) + + return csv_module.CreateReaderAndGuessDelimiter(b), nil + } + + baseReader, err := csvReaderFromCommit(baseCommit) + if err == errTooLarge { + return CsvDiffResult{nil, err.Error()} + } + headReader, err := csvReaderFromCommit(headCommit) + if err == errTooLarge { + return CsvDiffResult{nil, err.Error()} + } + + sections, err := gitdiff.CreateCsvDiff(diffFile, baseReader, headReader) + if err != nil { + errMessage, err := csv_module.FormatError(err, ctx.Locale) + if err != nil { + log.Error("RenderCsvDiff failed: %v", err) + return CsvDiffResult{nil, ""} + } + return CsvDiffResult{nil, errMessage} + } + return CsvDiffResult{sections, ""} + } +} + // ParseCompareInfo parse compare info between two commit for preparing comparing references func ParseCompareInfo(ctx *context.Context) (*models.User, *models.Repository, *git.Repository, *git.CompareInfo, string, string) { baseRepo := ctx.Repo.Repository @@ -490,9 +573,8 @@ func PrepareCompareDiff( ctx.Data["Username"] = headUser.Name ctx.Data["Reponame"] = headRepo.Name - setImageCompareContext(ctx, baseCommit, headCommit) headTarget := path.Join(headUser.Name, repo.Name) - setPathsCompareContext(ctx, baseCommit, headCommit, headTarget) + setCompareContext(ctx, baseCommit, headCommit, headTarget) return false } diff --git a/routers/repo/pull.go b/routers/repo/pull.go index 2ed47605f..cc6841da4 100644 --- a/routers/repo/pull.go +++ b/routers/repo/pull.go @@ -591,7 +591,6 @@ func ViewPullFiles(ctx *context.Context) { gitRepo *git.Repository ) - var headTarget string var prInfo *git.CompareInfo if pull.HasMerged { prInfo = PrepareMergedViewPullInfo(ctx, issue) @@ -618,7 +617,6 @@ func ViewPullFiles(ctx *context.Context) { startCommitID = prInfo.MergeBase endCommitID = headCommitID - headTarget = path.Join(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name) ctx.Data["Username"] = ctx.Repo.Owner.Name ctx.Data["Reponame"] = ctx.Repo.Repository.Name ctx.Data["AfterCommitID"] = endCommitID @@ -672,8 +670,8 @@ func ViewPullFiles(ctx *context.Context) { } } - setImageCompareContext(ctx, baseCommit, commit) - setPathsCompareContext(ctx, baseCommit, commit, headTarget) + headTarget := path.Join(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name) + setCompareContext(ctx, baseCommit, commit, headTarget) ctx.Data["RequireHighlightJS"] = true ctx.Data["RequireSimpleMDE"] = true diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go new file mode 100644 index 000000000..f4310d877 --- /dev/null +++ b/services/gitdiff/csv.go @@ -0,0 +1,379 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "encoding/csv" + "errors" + "io" + + "code.gitea.io/gitea/modules/util" +) + +const unmappedColumn = -1 +const maxRowsToInspect int = 10 +const minRatioToMatch float32 = 0.8 + +// TableDiffCellType represents the type of a TableDiffCell. +type TableDiffCellType uint8 + +// TableDiffCellType possible values. +const ( + TableDiffCellEqual TableDiffCellType = iota + 1 + TableDiffCellChanged + TableDiffCellAdd + TableDiffCellDel +) + +// TableDiffCell represents a cell of a TableDiffRow +type TableDiffCell struct { + LeftCell string + RightCell string + Type TableDiffCellType +} + +// TableDiffRow represents a row of a TableDiffSection. +type TableDiffRow struct { + RowIdx int + Cells []*TableDiffCell +} + +// TableDiffSection represents a section of a DiffFile. +type TableDiffSection struct { + Rows []*TableDiffRow +} + +// csvReader wraps a csv.Reader which buffers the first rows. +type csvReader struct { + reader *csv.Reader + buffer [][]string + line int + eof bool +} + +// createCsvReader creates a csvReader and fills the buffer +func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) { + csv := &csvReader{reader: reader} + csv.buffer = make([][]string, bufferRowCount) + for i := 0; i < bufferRowCount && !csv.eof; i++ { + row, err := csv.readNextRow() + if err != nil { + return nil, err + } + csv.buffer[i] = row + } + csv.line = bufferRowCount + return csv, nil +} + +// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned. +func (csv *csvReader) GetRow(row int) ([]string, error) { + if row < len(csv.buffer) { + return csv.buffer[row], nil + } + if csv.eof { + return nil, nil + } + for { + fields, err := csv.readNextRow() + if err != nil { + return nil, err + } + if csv.eof { + return nil, nil + } + csv.line++ + if csv.line-1 == row { + return fields, nil + } + } +} + +func (csv *csvReader) readNextRow() ([]string, error) { + if csv.eof { + return nil, nil + } + row, err := csv.reader.Read() + if err != nil { + if err != io.EOF { + return nil, err + } + csv.eof = true + } + return row, nil +} + +// CreateCsvDiff creates a tabular diff based on two CSV readers. +func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { + if baseReader != nil && headReader != nil { + return createCsvDiff(diffFile, baseReader, headReader) + } + + if baseReader != nil { + return createCsvDiffSingle(baseReader, TableDiffCellDel) + } + return createCsvDiffSingle(headReader, TableDiffCellAdd) +} + +// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted. +func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) { + var rows []*TableDiffRow + i := 1 + for { + row, err := reader.Read() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + cells := make([]*TableDiffCell, len(row)) + for j := 0; j < len(row); j++ { + cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype} + } + rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells}) + i++ + } + + return []*TableDiffSection{{Rows: rows}}, nil +} + +func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { + a, err := createCsvReader(baseReader, maxRowsToInspect) + if err != nil { + return nil, err + } + + b, err := createCsvReader(headReader, maxRowsToInspect) + if err != nil { + return nil, err + } + + a2b, b2a := getColumnMapping(a, b) + + columns := len(a2b) + countUnmappedColumns(b2a) + if len(a2b) < len(b2a) { + columns = len(b2a) + countUnmappedColumns(a2b) + } + + createDiffRow := func(aline int, bline int) (*TableDiffRow, error) { + cells := make([]*TableDiffCell, columns) + + if aline == 0 || bline == 0 { + var ( + row []string + celltype TableDiffCellType + err error + ) + if bline == 0 { + row, err = a.GetRow(aline - 1) + celltype = TableDiffCellDel + } else { + row, err = b.GetRow(bline - 1) + celltype = TableDiffCellAdd + } + if err != nil { + return nil, err + } + if row == nil { + return nil, nil + } + for i := 0; i < len(row); i++ { + cells[i] = &TableDiffCell{LeftCell: row[i], Type: celltype} + } + return &TableDiffRow{RowIdx: bline, Cells: cells}, nil + } + + arow, err := a.GetRow(aline - 1) + if err != nil { + return nil, err + } + brow, err := b.GetRow(bline - 1) + if err != nil { + return nil, err + } + if len(arow) == 0 && len(brow) == 0 { + return nil, nil + } + + for i := 0; i < len(a2b); i++ { + acell, _ := getCell(arow, i) + if a2b[i] == unmappedColumn { + cells[i] = &TableDiffCell{LeftCell: acell, Type: TableDiffCellDel} + } else { + bcell, _ := getCell(brow, a2b[i]) + + celltype := TableDiffCellChanged + if acell == bcell { + celltype = TableDiffCellEqual + } + + cells[i] = &TableDiffCell{LeftCell: acell, RightCell: bcell, Type: celltype} + } + } + for i := 0; i < len(b2a); i++ { + if b2a[i] == unmappedColumn { + bcell, _ := getCell(brow, i) + cells[i] = &TableDiffCell{LeftCell: bcell, Type: TableDiffCellAdd} + } + } + + return &TableDiffRow{RowIdx: bline, Cells: cells}, nil + } + + var sections []*TableDiffSection + + for i, section := range diffFile.Sections { + var rows []*TableDiffRow + lines := tryMergeLines(section.Lines) + for j, line := range lines { + if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) { + diffRow, err := createDiffRow(1, 1) + if err != nil { + return nil, err + } + if diffRow != nil { + rows = append(rows, diffRow) + } + } + diffRow, err := createDiffRow(line[0], line[1]) + if err != nil { + return nil, err + } + if diffRow != nil { + rows = append(rows, diffRow) + } + } + + if len(rows) > 0 { + sections = append(sections, &TableDiffSection{Rows: rows}) + } + } + + return sections, nil +} + +// getColumnMapping creates a mapping of columns between a and b +func getColumnMapping(a *csvReader, b *csvReader) ([]int, []int) { + arow, _ := a.GetRow(0) + brow, _ := b.GetRow(0) + + a2b := []int{} + b2a := []int{} + + if arow != nil { + a2b = make([]int, len(arow)) + } + if brow != nil { + b2a = make([]int, len(brow)) + } + + for i := 0; i < len(b2a); i++ { + b2a[i] = unmappedColumn + } + + bcol := 0 + for i := 0; i < len(a2b); i++ { + a2b[i] = unmappedColumn + + acell, ea := getCell(arow, i) + if ea == nil { + for j := bcol; j < len(b2a); j++ { + bcell, eb := getCell(brow, j) + if eb == nil && acell == bcell { + a2b[i] = j + b2a[j] = i + bcol = j + 1 + break + } + } + } + } + + tryMapColumnsByContent(a, a2b, b, b2a) + tryMapColumnsByContent(b, b2a, a, a2b) + + return a2b, b2a +} + +// tryMapColumnsByContent tries to map missing columns by the content of the first lines. +func tryMapColumnsByContent(a *csvReader, a2b []int, b *csvReader, b2a []int) { + start := 0 + for i := 0; i < len(a2b); i++ { + if a2b[i] == unmappedColumn { + if b2a[start] == unmappedColumn { + rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(a.buffer), len(b.buffer))-1)) + same := 0 + for j := 1; j <= rows; j++ { + acell, ea := getCell(a.buffer[j], i) + bcell, eb := getCell(b.buffer[j], start+1) + if ea == nil && eb == nil && acell == bcell { + same++ + } + } + if (float32(same) / float32(rows)) > minRatioToMatch { + a2b[i] = start + 1 + b2a[start+1] = i + } + } + } + start = a2b[i] + } +} + +// getCell returns the specific cell or nil if not present. +func getCell(row []string, column int) (string, error) { + if column < len(row) { + return row[column], nil + } + return "", errors.New("Undefined column") +} + +// countUnmappedColumns returns the count of unmapped columns. +func countUnmappedColumns(mapping []int) int { + count := 0 + for i := 0; i < len(mapping); i++ { + if mapping[i] == unmappedColumn { + count++ + } + } + return count +} + +// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered. +func tryMergeLines(lines []*DiffLine) [][2]int { + ids := make([][2]int, len(lines)) + + i := 0 + for _, line := range lines { + if line.Type != DiffLineSection { + ids[i][0] = line.LeftIdx + ids[i][1] = line.RightIdx + i++ + } + } + + ids = ids[:i] + + result := make([][2]int, len(ids)) + + j := 0 + for i = 0; i < len(ids); i++ { + if ids[i][0] == 0 { + if j > 0 && result[j-1][1] == 0 { + temp := j + for temp > 0 && result[temp-1][1] == 0 { + temp-- + } + result[temp][1] = ids[i][1] + continue + } + } + result[j] = ids[i] + j++ + } + + return result[:j] +} diff --git a/services/gitdiff/csv_test.go b/services/gitdiff/csv_test.go new file mode 100644 index 000000000..17edea582 --- /dev/null +++ b/services/gitdiff/csv_test.go @@ -0,0 +1,119 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "encoding/csv" + "strings" + "testing" + + csv_module "code.gitea.io/gitea/modules/csv" + "code.gitea.io/gitea/modules/setting" + "github.com/stretchr/testify/assert" +) + +func TestCSVDiff(t *testing.T) { + var cases = []struct { + diff string + base string + head string + cells [][2]TableDiffCellType + }{ + // case 0 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -0,0 +1,2 @@ ++col1,col2 ++a,a`, + base: "", + head: "col1,col2\na,a", + cells: [][2]TableDiffCellType{{TableDiffCellAdd, TableDiffCellAdd}, {TableDiffCellAdd, TableDiffCellAdd}}, + }, + // case 1 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,3 @@ + col1,col2 +-a,a ++a,a ++b,b`, + base: "col1,col2\na,a", + head: "col1,col2\na,a\nb,b", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellAdd, TableDiffCellAdd}}, + }, + // case 2 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,3 +1,2 @@ + col1,col2 +-a,a + b,b`, + base: "col1,col2\na,a\nb,b", + head: "col1,col2\nb,b", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellDel, TableDiffCellDel}, {TableDiffCellEqual, TableDiffCellEqual}}, + }, + // case 3 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,2 @@ + col1,col2 +-b,b ++b,c`, + base: "col1,col2\nb,b", + head: "col1,col2\nb,c", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellChanged}}, + }, + // case 4 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +0,0 @@ +-col1,col2 +-b,c`, + base: "col1,col2\nb,c", + head: "", + cells: [][2]TableDiffCellType{{TableDiffCellDel, TableDiffCellDel}, {TableDiffCellDel, TableDiffCellDel}}, + }, + } + + for n, c := range cases { + diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff)) + if err != nil { + t.Errorf("ParsePatch failed: %s", err) + } + + var baseReader *csv.Reader + if len(c.base) > 0 { + baseReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.base)) + } + var headReader *csv.Reader + if len(c.head) > 0 { + headReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.head)) + } + + result, err := CreateCsvDiff(diff.Files[0], baseReader, headReader) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "case %d: should be one section", n) + + section := result[0] + assert.Equal(t, len(c.cells), len(section.Rows), "case %d: should be %d rows", n, len(c.cells)) + + for i, row := range section.Rows { + assert.Equal(t, 2, len(row.Cells), "case %d: row %d should have two cells", n, i) + for j, cell := range row.Cells { + assert.Equal(t, c.cells[i][j], cell.Type, "case %d: row %d cell %d should be equal", n, i, j) + } + } + } +} diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index f93dac9b4..368fc2c44 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -79,6 +79,8 @@ {{else}} {{$isImage = (call $.IsImageFileInHead $file.Name)}} {{end}} + {{$isCsv := (call $.IsCsvFile $file)}} + {{$showFileViewToggle := or $isImage $isCsv}} {{svg "octicon-chevron-down" 18}} @@ -92,6 +94,12 @@ {{if $file.IsRenamed}}{{$file.OldName}} → {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}}
+ {{if $showFileViewToggle}} +
+ {{svg "octicon-code"}} + {{svg "octicon-file"}} +
+ {{end}} {{if $file.IsProtected}} {{$.i18n.Tr "repo.diff.protected"}} {{end}} @@ -106,21 +114,30 @@
{{if ne $file.Type 4}} -
- - - {{if $isImage}} - {{template "repo/diff/image_diff" dict "file" . "root" $}} - {{else}} - {{if $.IsSplitStyle}} - {{template "repo/diff/section_split" dict "file" . "root" $}} - {{else}} - {{template "repo/diff/section_unified" dict "file" . "root" $}} - {{end}} - {{end}} - +
+ {{if $file.IsBin}} +
{{$.i18n.Tr "repo.diff.bin_not_shown"}}
+ {{else}} +
+ {{if $.IsSplitStyle}} + {{template "repo/diff/section_split" dict "file" . "root" $}} + {{else}} + {{template "repo/diff/section_unified" dict "file" . "root" $}} + {{end}}
+ {{end}}
+ {{if or $isImage $isCsv}} +
+ + {{if $isImage}} + {{template "repo/diff/image_diff" dict "file" . "root" $}} + {{else}} + {{template "repo/diff/csv_diff" dict "file" . "root" $}} + {{end}} +
+
+ {{end}} {{end}}
diff --git a/templates/repo/diff/csv_diff.tmpl b/templates/repo/diff/csv_diff.tmpl new file mode 100644 index 000000000..c4af70461 --- /dev/null +++ b/templates/repo/diff/csv_diff.tmpl @@ -0,0 +1,46 @@ + + + {{$result := call .root.CreateCsvDiff .file .root.BaseCommit .root.HeadCommit}} + {{if $result.Error}} +
{{$result.Error}}
+ {{else if $result.Sections}} + + {{range $i, $section := $result.Sections}} + + {{range $j, $row := $section.Rows}} + + {{if and (eq $i 0) (eq $j 0)}} + + {{range $j, $cell := $row.Cells}} + {{if eq $cell.Type 2}} + + {{else if eq $cell.Type 3}} + + {{else if eq $cell.Type 4}} + + {{else}} + + {{end}} + {{end}} + {{else}} + + {{range $j, $cell := $row.Cells}} + {{if eq $cell.Type 2}} + + {{else if eq $cell.Type 3}} + + {{else if eq $cell.Type 4}} + + {{else}} + + {{end}} + {{end}} + {{end}} + + {{end}} + + {{end}} +
{{.RowIdx}}{{.LeftCell}} {{.RightCell}}{{.LeftCell}}{{.LeftCell}}{{.RightCell}}{{if .RowIdx}}{{.RowIdx}}{{end}}{{.LeftCell}} {{.RightCell}}{{.LeftCell}}{{.LeftCell}}{{.RightCell}}
+ {{end}} + + \ No newline at end of file diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index be7d65177..76c0ac76f 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -64,7 +64,7 @@ {{end}}
-
+
{{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} {{else if .IsRenderedHTML}} diff --git a/web_src/js/index.js b/web_src/js/index.js index de9b99d4e..aa9b3be00 100644 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -2489,6 +2489,18 @@ function initIssueReferenceRepositorySearch() { }); } +function initFileViewToggle() { + $('.file-view-toggle').on('click', function() { + const $this = $(this); + $this.parent().children().removeClass('active'); + $this.addClass('active'); + + const $target = $($this.data('toggle-selector')); + $target.parent().children().addClass('hide'); + $target.removeClass('hide'); + }); +} + function initLinkAccountView() { const $lnkUserPage = $('.page-content.user.link-account'); if ($lnkUserPage.length === 0) { @@ -2756,6 +2768,7 @@ $(document).ready(async () => { initTableSort(); initNotificationsTable(); initPullRequestMergeInstruction(); + initFileViewToggle(); initReleaseEditor(); initRelease(); diff --git a/web_src/less/_markdown.less b/web_src/less/_markdown.less index 6bb0bf432..09c94f067 100644 --- a/web_src/less/_markdown.less +++ b/web_src/less/_markdown.less @@ -473,31 +473,6 @@ box-shadow: inset 0 -1px 0 var(--color-secondary); } - .csv-data td, - .csv-data th { - padding: 5px; - overflow: hidden; - font-size: 12px; - line-height: 1; - text-align: left; - white-space: nowrap; - } - - .csv-data .blob-num { - padding: 10px 8px 9px; - text-align: right; - border: 0; - } - - .csv-data tr { - border-top: 0; - } - - .csv-data th { - font-weight: 600; - border-top: 0; - } - .ui.list .list, ol.ui.list ol, ul.ui.list ul { diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index c49da7b39..10e573678 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -1455,6 +1455,65 @@ } } + .data-table { + width: 100%; + + tr { + border-top: 0; + } + + td, + th { + padding: 5px !important; + overflow: hidden; + font-size: 12px; + text-align: left; + white-space: nowrap; + border: 1px solid var(--color-secondary); + } + + td { + white-space: pre-line; + } + + th { + font-weight: 600; + background: var(--color-box-header); + border-top: 0; + } + + td.added, + th.added, + tr.added { + background-color: var(--color-diff-added-row-bg) !important; + } + + td.removed, + th.removed, + tr.removed { + background-color: var(--color-diff-removed-row-bg) !important; + } + + tbody.section { + border-top: 2px solid var(--color-secondary); + } + + .line-num { + width: 1%; + min-width: 50px; + font-family: monospace; + line-height: 20px; + color: var(--color-secondary-dark-2); + white-space: nowrap; + vertical-align: top; + cursor: pointer; + user-select: none; + text-align: right; + background: var(--color-body); + border: 0; + } + } + .diff-detail-box { padding: 7px 0; background: var(--color-body);