@ -441,91 +441,252 @@ func (diff *Diff) LoadComments(issue *models.Issue, currentUser *models.User) er
const cmdDiffHead = "diff --git "
// ParsePatch builds a Diff object from a io.Reader and some
// parameters.
// TODO: move this function to gogits/git-module
// ParsePatch builds a Diff object from a io.Reader and some parameters.
func ParsePatch ( maxLines , maxLineCharacters , maxFiles int , reader io . Reader ) ( * Diff , error ) {
var (
diff = & Diff { Files : make ( [ ] * DiffFile , 0 ) }
curFile = & DiffFile { }
curSection = & DiffSection {
Lines : make ( [ ] * DiffLine , 0 , 10 ) ,
var curFile * DiffFile
diff := & Diff { Files : make ( [ ] * DiffFile , 0 ) }
sb := strings . Builder { }
// OK let's set a reasonable buffer size.
// This should be let's say at least the size of maxLineCharacters or 4096 whichever is larger.
readerSize := maxLineCharacters
if readerSize < 4096 {
readerSize = 4096
}
leftLine , rightLine int
lineCount int
curFileLinesCount int
curFileLFSPrefix bool
)
input := bufio . NewReaderSize ( reader , readerSize )
line , err := input . ReadString ( '\n' )
if err != nil {
if err == io . EOF {
return diff , nil
}
return diff , err
}
parsingLoop :
for {
// 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted)
// if it does not we have bad input!
if ! strings . HasPrefix ( line , cmdDiffHead ) {
return diff , fmt . Errorf ( "Invalid first file line: %s" , line )
}
input := bufio . NewReader ( reader )
isEOF := false
for ! isEOF {
var linebuf bytes . Buffer
// TODO: Handle skipping first n files
if len ( diff . Files ) >= maxFiles {
diff . IsIncomplete = true
_ , err := io . Copy ( ioutil . Discard , reader )
if err != nil {
// By the definition of io.Copy this never returns io.EOF
return diff , fmt . Errorf ( "Copy: %v" , err )
}
break parsingLoop
}
curFile = createDiffFile ( diff , line )
diff . Files = append ( diff . Files , curFile )
// 2. It is followed by one or more extended header lines:
//
// old mode <mode>
// new mode <mode>
// deleted file mode <mode>
// new file mode <mode>
// copy from <path>
// copy to <path>
// rename from <path>
// rename to <path>
// similarity index <number>
// dissimilarity index <number>
// index <hash>..<hash> <mode>
//
// * <mode> 6-digit octal numbers including the file type and file permission bits.
// * <path> does not include the a/ and b/ prefixes
// * <number> percentage of unchanged lines for similarity, percentage of changed
// lines dissimilarity as integer rounded down with terminal %. 100% => equal files.
// * The index line includes the blob object names before and after the change.
// The <mode> is included if the file mode does not change; otherwise, separate
// lines indicate the old and the new mode.
// 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...)
//
// --- a/<path>
// +++ b/<path>
//
// With multiple hunks
//
// @@ <hunk descriptor> @@
// +added line
// -removed line
// unchanged line
//
// 4. Binary files get:
//
// Binary files a/<path> and b/<path> differ
//
// but one of a/<path> and b/<path> could be /dev/null.
curFileLoop :
for {
b , err := input . ReadByte ( )
line , err = input . ReadString ( '\n' )
if err != nil {
if err == io . EOF {
isEOF = true
break
} else {
return nil , fmt . Errorf ( "ReadByte: %v" , err )
if err != io . EOF {
return diff , err
}
break parsingLoop
}
switch {
case strings . HasPrefix ( line , "old mode " ) ||
strings . HasPrefix ( line , "new mode " ) :
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "copy from " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileCopy
case strings . HasPrefix ( line , "copy to " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileCopy
case strings . HasPrefix ( line , "new file" ) :
curFile . Type = DiffFileAdd
curFile . IsCreated = true
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
if b == '\n' {
break
case strings . HasPrefix ( line , "deleted" ) :
curFile . Type = DiffFileDel
curFile . IsDeleted = true
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
if linebuf . Len ( ) < maxLineCharacters {
linebuf . WriteByte ( b )
} else if linebuf . Len ( ) == maxLineCharacters {
curFile . IsIncomplete = true
case strings . HasPrefix ( line , "index" ) :
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "similarity index 100%" ) :
curFile . Type = DiffFileRename
case strings . HasPrefix ( line , "Binary" ) :
curFile . IsBin = true
case strings . HasPrefix ( line , "--- " ) :
// Do nothing with this line
case strings . HasPrefix ( line , "+++ " ) :
// Do nothing with this line
lineBytes , isFragment , err := parseHunks ( curFile , maxLines , maxLineCharacters , input )
diff . TotalAddition += curFile . Addition
diff . TotalDeletion += curFile . Deletion
if err != nil {
if err != io . EOF {
return diff , err
}
break parsingLoop
}
sb . Reset ( )
_ , _ = sb . Write ( lineBytes )
for isFragment {
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
return diff , fmt . Errorf ( "Unable to ReadLine: %v" , err )
}
_ , _ = sb . Write ( lineBytes )
}
line := linebuf . String ( )
line = sb . String ( )
sb . Reset ( )
if strings . HasPrefix ( line , "+++ " ) || strings . HasPrefix ( line , "--- " ) || len ( line ) == 0 {
continue
break curFileLoop
}
}
trimLine := strings . Trim ( line , "+- " )
}
if trimLine == models . LFSMetaFileIdentifier {
curFileLFSPrefix = true
// FIXME: There are numerous issues with this:
// - we might want to consider detecting encoding while parsing but...
// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
// - and this doesn't really account for changes in encoding
var buf bytes . Buffer
for _ , f := range diff . Files {
buf . Reset ( )
for _ , sec := range f . Sections {
for _ , l := range sec . Lines {
if l . Type == DiffLineSection {
continue
}
buf . WriteString ( l . Content [ 1 : ] )
buf . WriteString ( "\n" )
}
}
charsetLabel , err := charset . DetectEncoding ( buf . Bytes ( ) )
if charsetLabel != "UTF-8" && err == nil {
encoding , _ := stdcharset . Lookup ( charsetLabel )
if encoding != nil {
d := encoding . NewDecoder ( )
for _ , sec := range f . Sections {
for _ , l := range sec . Lines {
if l . Type == DiffLineSection {
continue
}
if c , _ , err := transform . String ( d , l . Content [ 1 : ] ) ; err == nil {
l . Content = l . Content [ 0 : 1 ] + c
}
}
}
}
}
}
if curFileLFSPrefix && strings . HasPrefix ( trimLine , models . LFSMetaFileOidPrefix ) {
oid := strings . TrimPrefix ( trimLine , models . LFSMetaFileOidPrefix )
diff . NumFiles = len ( diff . Files )
return diff , nil
}
if len ( oid ) == 64 {
m := & models . LFSMetaObject { Oid : oid }
count , err := models . Count ( m )
func parseHunks ( curFile * DiffFile , maxLines , maxLineCharacters int , input * bufio . Reader ) ( lineBytes [ ] byte , isFragment bool , err error ) {
sb := strings . Builder { }
if err == nil && count > 0 {
curFile . IsBin = true
curFile . IsLFSFile = true
curSection . Lines = nil
var (
curSection * DiffSection
curFileLinesCount int
curFileLFSPrefix bool
)
leftLine , rightLine := 1 , 1
for {
sb . Reset ( )
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
if err == io . EOF {
return
}
err = fmt . Errorf ( "Unable to ReadLine: %v" , err )
return
}
if lineBytes [ 0 ] == 'd' {
// End of hunks
return
}
curFileLinesCount ++
lineCount ++
// Diff data too large, we only show the first about maxLines lines
switch lineBytes [ 0 ] {
case '@' :
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
}
switch {
case line [ 0 ] == ' ' :
diffLine := & DiffLine { Type : DiffLinePlain , Content : line , LeftIdx : leftLine , RightIdx : rightLine }
leftLine ++
rightLine ++
curSection . Lines = append ( curSection . Lines , diffLine )
curSection . FileName = curFile . Name
continue
case line [ 0 ] == '@' :
}
_ , _ = sb . Write ( lineBytes )
for isFragment {
// This is very odd indeed - we're in a section header and the line is too long
// This really shouldn't happen...
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
err = fmt . Errorf ( "Unable to ReadLine: %v" , err )
return
}
_ , _ = sb . Write ( lineBytes )
}
line := sb . String ( )
// Create a new section to represent this hunk
curSection = & DiffSection { }
curFile . Sections = append ( curFile . Sections , curSection )
lineSectionInfo := getDiffLineSectionInfo ( curFile . Name , line , leftLine - 1 , rightLine - 1 )
diffLine := & DiffLine {
Type : DiffLineSection ,
@ -538,148 +699,132 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D
leftLine = lineSectionInfo . LeftIdx
rightLine = lineSectionInfo . RightIdx
continue
case line [ 0 ] == '+' :
case '\\' :
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
// This is used only to indicate that the current file does not have a terminal newline
if ! bytes . Equal ( lineBytes , [ ] byte ( "\\ No newline at end of file" ) ) {
err = fmt . Errorf ( "Unexpected line in hunk: %s" , string ( lineBytes ) )
return
}
// Technically this should be the end the file!
// FIXME: we should be putting a marker at the end of the file if there is no terminal new line
continue
case '+' :
curFileLinesCount ++
curFile . Addition ++
diff . TotalAddition ++
diffLine := & DiffLine { Type : DiffLineAdd , Content : line , RightIdx : rightLine }
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
diffLine := & DiffLine { Type : DiffLineAdd , RightIdx : rightLine }
rightLine ++
curSection . Lines = append ( curSection . Lines , diffLine )
curSection . FileName = curFile . Name
continue
case line [ 0 ] == '-' :
case '-' :
curFileLinesCount ++
curFile . Deletion ++
diff . TotalDeletion ++
diffLine := & DiffLine { Type : DiffLineDel , Content : line , LeftIdx : leftLine }
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
diffLine := & DiffLine { Type : DiffLineDel , LeftIdx : leftLine }
if leftLine > 0 {
leftLine ++
}
curSection . Lines = append ( curSection . Lines , diffLine )
curSection . FileName = curFile . Name
case strings . HasPrefix ( line , "Binary" ) :
curFile . IsBin = true
case ' ' :
curFileLinesCount ++
if curFileLinesCount >= maxLines {
curFile . IsIncomplete = true
continue
}
diffLine := & DiffLine { Type : DiffLinePlain , LeftIdx : leftLine , RightIdx : rightLine }
leftLine ++
rightLine ++
curSection . Lines = append ( curSection . Lines , diffLine )
default :
// This is unexpected
err = fmt . Errorf ( "Unexpected line in hunk: %s" , string ( lineBytes ) )
return
}
// Get new file.
if strings . HasPrefix ( line , cmdDiffHead ) {
if len ( diff . Files ) >= maxFiles {
diff . IsIncomplete = true
_ , err := io . Copy ( ioutil . Discard , reader )
line := string ( lineBytes )
if isFragment {
curFile . IsIncomplete = true
for isFragment {
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
return nil , fmt . Errorf ( "Copy: %v" , err )
// Now by the definition of ReadLine this cannot be io.EOF
err = fmt . Errorf ( "Unable to ReadLine: %v" , err )
return
}
break
}
}
curSection . Lines [ len ( curSection . Lines ) - 1 ] . Content = line
// Note: In case file name is surrounded by double quotes (it happens only in git-shell).
// e.g. diff --git "a/xxx" "b/xxx"
var a string
var b string
// handle LFS
if line [ 1 : ] == models . LFSMetaFileIdentifier {
curFileLFSPrefix = true
} else if curFileLFSPrefix && strings . HasPrefix ( line [ 1 : ] , models . LFSMetaFileOidPrefix ) {
oid := strings . TrimPrefix ( line [ 1 : ] , models . LFSMetaFileOidPrefix )
if len ( oid ) == 64 {
m := & models . LFSMetaObject { Oid : oid }
count , err := models . Count ( m )
rd := strings . NewReader ( line [ len ( cmdDiffHead ) : ] )
char , _ := rd . ReadByte ( )
_ = rd . UnreadByte ( )
if char == '"' {
fmt . Fscanf ( rd , "%q " , & a )
if a [ 0 ] == '\\' {
a = a [ 1 : ]
if err == nil && count > 0 {
curFile . IsBin = true
curFile . IsLFSFile = true
curSection . Lines = nil
}
} else {
fmt . Fscanf ( rd , "%s " , & a )
}
char , _ = rd . ReadByte ( )
_ = rd . UnreadByte ( )
if char == '"' {
fmt . Fscanf ( rd , "%q" , & b )
if b [ 0 ] == '\\' {
b = b [ 1 : ]
}
} else {
fmt . Fscanf ( rd , "%s" , & b )
}
a = a [ 2 : ]
b = b [ 2 : ]
}
curFile = & DiffFile {
Name : b ,
OldName : a ,
func createDiffFile ( diff * Diff , line string ) * DiffFile {
// The a/ and b/ filenames are the same unless rename/copy is involved.
// Especially, even for a creation or a deletion, /dev/null is not used
// in place of the a/ or b/ filenames.
//
// When rename/copy is involved, file1 and file2 show the name of the
// source file of the rename/copy and the name of the file that rename/copy
// produces, respectively.
//
// Path names are quoted if necessary.
//
// This means that you should always be able to determine the file name even when there
// there is potential ambiguity...
//
// but we can be simpler with our heuristics by just forcing git to prefix things nicely
curFile := & DiffFile {
Index : len ( diff . Files ) + 1 ,
Type : DiffFileChange ,
Sections : make ( [ ] * DiffSection , 0 , 10 ) ,
IsRenamed : a != b ,
}
diff . Files = append ( diff . Files , curFile )
curFileLinesCount = 0
leftLine = 1
rightLine = 1
curFileLFSPrefix = false
// Check file diff type and is submodule.
for {
line , err := input . ReadString ( '\n' )
if err != nil {
if err == io . EOF {
isEOF = true
} else {
return nil , fmt . Errorf ( "ReadString: %v" , err )
}
}
switch {
case strings . HasPrefix ( line , "copy from " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileCopy
case strings . HasPrefix ( line , "copy to " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileCopy
case strings . HasPrefix ( line , "new file" ) :
curFile . Type = DiffFileAdd
curFile . IsCreated = true
case strings . HasPrefix ( line , "deleted" ) :
curFile . Type = DiffFileDel
curFile . IsDeleted = true
case strings . HasPrefix ( line , "index" ) :
rd := strings . NewReader ( line [ len ( cmdDiffHead ) : ] + " " )
curFile . Type = DiffFileChange
case strings . HasPrefix ( line , "similarity index 100%" ) :
curFile . Type = DiffFileRename
}
if curFile . Type > 0 {
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
break
}
}
}
}
curFile . OldName = readFileName ( rd )
curFile . Name = readFileName ( rd )
curFile . IsRenamed = curFile . Name != curFile . OldName
return curFile
}
// FIXME: detect encoding while parsing.
var buf bytes . Buffer
for _ , f := range diff . Files {
buf . Reset ( )
for _ , sec := range f . Sections {
for _ , l := range sec . Lines {
buf . WriteString ( l . Content )
buf . WriteString ( "\n" )
}
}
charsetLabel , err := charset . DetectEncoding ( buf . Bytes ( ) )
if charsetLabel != "UTF-8" && err == nil {
encoding , _ := stdcharset . Lookup ( charsetLabel )
if encoding != nil {
d := encoding . NewDecoder ( )
for _ , sec := range f . Sections {
for _ , l := range sec . Lines {
if c , _ , err := transform . String ( d , l . Content ) ; err == nil {
l . Content = c
}
}
}
}
func readFileName ( rd * strings . Reader ) string {
var name string
char , _ := rd . ReadByte ( )
_ = rd . UnreadByte ( )
if char == '"' {
fmt . Fscanf ( rd , "%q " , & name )
if name [ 0 ] == '\\' {
name = name [ 1 : ]
}
} else {
fmt . Fscanf ( rd , "%s " , & name )
}
diff . NumFiles = len ( diff . Files )
return diff , nil
return name [ 2 : ]
}
// GetDiffRange builds a Diff between two commits of a repository.