@ -5,325 +5,237 @@
package git
import (
"bufio"
"context"
"fmt"
"os/exec"
"path"
"runtime"
"strconv"
"strings"
"sync"
"time"
"github.com/emirpasic/gods/trees/binaryheap"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
)
const (
// parameters for searching for commit infos. If the untargeted search has
// not found any entries in the past 5 commits, and 12 or fewer entries
// remain, then we'll just let the targeted-searching threads finish off,
// and stop the untargeted search to not interfere.
deferToTargetedSearchColdStreak = 5
deferToTargetedSearchNumRemainingEntries = 12
)
// getCommitsInfoState shared state while getting commit info for entries
type getCommitsInfoState struct {
lock sync . Mutex
/* read-only fields, can be read without the mutex */
// entries and entryPaths are read-only after initialization, so they can
// safely be read without the mutex
entries [ ] * TreeEntry
// set of filepaths to get info for
entryPaths map [ string ] struct { }
treePath string
headCommit * Commit
/* mutable fields, must hold mutex to read or write */
// map from filepath to commit
commits map [ string ] * Commit
// set of filepaths that have been or are being searched for in a target search
targetedPaths map [ string ] struct { }
}
func ( state * getCommitsInfoState ) numRemainingEntries ( ) int {
state . lock . Lock ( )
defer state . lock . Unlock ( )
return len ( state . entries ) - len ( state . commits )
}
// getTargetEntryPath Returns the next path for a targeted-searching thread to
// search for, or returns the empty string if nothing left to search for
func ( state * getCommitsInfoState ) getTargetedEntryPath ( ) string {
var targetedEntryPath string
state . lock . Lock ( )
defer state . lock . Unlock ( )
for _ , entry := range state . entries {
entryPath := path . Join ( state . treePath , entry . Name ( ) )
if _ , ok := state . commits [ entryPath ] ; ok {
continue
} else if _ , ok = state . targetedPaths [ entryPath ] ; ok {
continue
}
targetedEntryPath = entryPath
state . targetedPaths [ entryPath ] = struct { } { }
break
// GetCommitsInfo gets information of all commits that are corresponding to these entries
func ( tes Entries ) GetCommitsInfo ( commit * Commit , treePath string , cache LastCommitCache ) ( [ ] [ ] interface { } , * Commit , error ) {
entryPaths := make ( [ ] string , len ( tes ) + 1 )
// Get the commit for the treePath itself
entryPaths [ 0 ] = ""
for i , entry := range tes {
entryPaths [ i + 1 ] = entry . Name ( )
}
return targetedEntryPath
}
// repeatedly perform targeted searches for unpopulated entries
func targetedSearch ( state * getCommitsInfoState , done chan error , cache LastCommitCache ) {
for {
entryPath := state . getTargetedEntryPath ( )
if len ( entryPath ) == 0 {
done <- nil
return
}
if cache != nil {
commit , err := cache . Get ( state . headCommit . repo . Path , state . headCommit . ID . String ( ) , entryPath )
if err == nil && commit != nil {
state . update ( entryPath , commit )
continue
}
}
command := NewCommand ( "rev-list" , "-1" , state . headCommit . ID . String ( ) , "--" , entryPath )
output , err := command . RunInDir ( state . headCommit . repo . Path )
if err != nil {
done <- err
return
}
id , err := NewIDFromString ( strings . TrimSpace ( output ) )
if err != nil {
done <- err
return
}
commit , err := state . headCommit . repo . getCommit ( id )
c , err := commit . repo . gogitRepo . CommitObject ( plumbing . Hash ( commit . ID ) )
if err != nil {
done <- err
return
return nil , nil , err
}
state . update ( entryPath , commit )
if cache != nil {
cache . Put ( state . headCommit . repo . Path , state . headCommit . ID . String ( ) , entryPath , commit )
}
}
}
func initGetCommitInfoState ( entries Entries , headCommit * Commit , treePath string ) * getCommitsInfoState {
entryPaths := make ( map [ string ] struct { } , len ( entries ) )
for _ , entry := range entries {
entryPaths [ path . Join ( treePath , entry . Name ( ) ) ] = struct { } { }
}
if treePath = path . Clean ( treePath ) ; treePath == "." {
treePath = ""
}
return & getCommitsInfoState {
entries : entries ,
entryPaths : entryPaths ,
commits : make ( map [ string ] * Commit , len ( entries ) ) ,
targetedPaths : make ( map [ string ] struct { } , len ( entries ) ) ,
treePath : treePath ,
headCommit : headCommit ,
revs , err := getLastCommitForPaths ( c , treePath , entryPaths )
if err != nil {
return nil , nil , err
}
}
// GetCommitsInfo gets information of all commits that are corresponding to these entries
func ( tes Entries ) GetCommitsInfo ( commit * Commit , treePath string , cache LastCommitCache ) ( [ ] [ ] interface { } , error ) {
state := initGetCommitInfoState ( tes , commit , treePath )
if err := getCommitsInfo ( state , cache ) ; err != nil {
return nil , err
}
if len ( state . commits ) < len ( state . entryPaths ) {
return nil , fmt . Errorf ( "could not find commits for all entries" )
}
commit . repo . gogitStorage . Close ( )
commitsInfo := make ( [ ] [ ] interface { } , len ( tes ) )
for i , entry := range tes {
commit , ok := state . commits [ path . Join ( treePath , entry . Name ( ) ) ]
if ! ok {
return nil , fmt . Errorf ( "could not find commit for %s" , entry . Name ( ) )
}
switch entry . Type {
case ObjectCommit :
if rev , ok := revs [ entry . Name ( ) ] ; ok {
entryCommit := convertCommit ( rev )
if entry . IsSubModule ( ) {
subModuleURL := ""
if subModule , err := state . headC ommit. GetSubModule ( entry . Name ( ) ) ; err != nil {
return nil , err
if subModule , err := commit . GetSubModule ( entry . Name ( ) ) ; err != nil {
return nil , nil , err
} else if subModule != nil {
subModuleURL = subModule . URL
}
subModuleFile := NewSubModuleFile ( c ommit, subModuleURL , entry . ID . String ( ) )
subModuleFile := NewSubModuleFile ( entryCommit , subModuleURL , entry . ID . String ( ) )
commitsInfo [ i ] = [ ] interface { } { entry , subModuleFile }
default :
commitsInfo [ i ] = [ ] interface { } { entry , commit }
} else {
commitsInfo [ i ] = [ ] interface { } { entry , entryCommit }
}
} else {
commitsInfo [ i ] = [ ] interface { } { entry , nil }
}
}
return commitsInfo , nil
// Retrieve the commit for the treePath itself (see above). We basically
// get it for free during the tree traversal and it's used for listing
// pages to display information about newest commit for a given path.
var treeCommit * Commit
if rev , ok := revs [ "" ] ; ok {
treeCommit = convertCommit ( rev )
}
return commitsInfo , treeCommit , nil
}
func ( state * getCommitsInfoState ) cleanEntryPath ( rawEntryPath string ) ( string , error ) {
if rawEntryPath [ 0 ] == '"' {
var err error
rawEntryPath , err = strconv . Unquote ( rawEntryPath )
type commitAndPaths struct {
commit * object . Commit
// Paths that are still on the branch represented by commit
paths [ ] string
// Set of hashes for the paths
hashes map [ string ] plumbing . Hash
}
func getCommitTree ( c * object . Commit , treePath string ) ( * object . Tree , error ) {
tree , err := c . Tree ( )
if err != nil {
return rawEntryPath , err
return nil , err
}
// Optimize deep traversals by focusing only on the specific tree
if treePath != "" {
tree , err = tree . Tree ( treePath )
if err != nil {
return nil , err
}
var entryNameStartIndex int
if len ( state . treePath ) > 0 {
entryNameStartIndex = len ( state . treePath ) + 1
}
if index := strings . IndexByte ( rawEntryPath [ entryNameStartIndex : ] , '/' ) ; index >= 0 {
return rawEntryPath [ : entryNameStartIndex + index ] , nil
}
return rawEntryPath , nil
return tree , nil
}
// update report that the given path was last modified by the given commit.
// Returns whether state.commits was updated
func ( state * getCommitsInfoState ) update ( entryPath string , commit * Commit ) bool {
if _ , ok := state . entryPaths [ entryPath ] ; ! ok {
return false
func getFullPath ( treePath , path string ) string {
if treePath != "" {
if path != "" {
return treePath + "/" + path
}
var updated bool
state . lock . Lock ( )
defer state . lock . Unlock ( )
if _ , ok := state . commits [ entryPath ] ; ! ok {
state . commits [ entryPath ] = commit
updated = true
}
return updated
return treePath
}
return path
}
const getCommitsInfoPretty = "--pretty=format:%H %ct %s"
func getFileHashes ( c * object . Commit , treePath string , paths [ ] string ) ( map [ string ] plumbing . Hash , error ) {
tree , err := getCommitTree ( c , treePath )
if err == object . ErrDirectoryNotFound {
// The whole tree didn't exist, so return empty map
return make ( map [ string ] plumbing . Hash ) , nil
}
if err != nil {
return nil , err
}
func getCommitsInfo ( state * getCommitsInfoState , cache LastCommitCache ) error {
ctx , cancel := context . WithTimeout ( context . Background ( ) , 1 * time . Minute )
defer cancel ( )
hashes := make ( map [ string ] plumbing . Hash )
for _ , path := range paths {
if path != "" {
entry , err := tree . FindEntry ( path )
if err == nil {
hashes [ path ] = entry . Hash
}
} else {
hashes [ path ] = tree . Hash
}
}
args := [ ] string { "log" , state . headCommit . ID . String ( ) , getCommitsInfoPretty , "--name-status" , "-c" }
if len ( state . treePath ) > 0 {
args = append ( args , "--" , state . treePath )
return hashes , nil
}
func getLastCommitForPaths ( c * object . Commit , treePath string , paths [ ] string ) ( map [ string ] * object . Commit , error ) {
// We do a tree traversal with nodes sorted by commit time
seen := make ( map [ plumbing . Hash ] bool )
heap := binaryheap . NewWith ( func ( a , b interface { } ) int {
if a . ( * commitAndPaths ) . commit . Committer . When . Before ( b . ( * commitAndPaths ) . commit . Committer . When ) {
return 1
}
cmd := exec . CommandContext ( ctx , "git" , args ... )
cmd . Dir = state . headCommit . repo . Path
return - 1
} )
readCloser , err := cmd . StdoutPipe ( )
result := make ( map [ string ] * object . Commit )
initialHashes , err := getFileHashes ( c , treePath , paths )
if err != nil {
return err
return nil , err
}
if err := cmd . Start ( ) ; err != nil {
return err
}
// it's okay to ignore the error returned by cmd.Wait(); we expect the
// subprocess to sometimes have a non-zero exit status, since we may
// prematurely close stdout, resulting in a broken pipe.
defer cmd . Wait ( )
// Start search from the root commit and with full set of paths
heap . Push ( & commitAndPaths { c , paths , initialHashes } )
numThreads := runtime . NumCPU ( )
done := make ( chan error , numThreads )
for i := 0 ; i < numThreads ; i ++ {
go targetedSearch ( state , done , cache )
for {
cIn , ok := heap . Pop ( )
if ! ok {
break
}
current := cIn . ( * commitAndPaths )
currentID := current . commit . ID ( )
scanner := bufio . NewScanner ( readCloser )
err = state . processGitLogOutput ( scanner )
// it is important that we close stdout here; if we do not close
// stdout, the subprocess will keep running, and the deffered call
// cmd.Wait() may block for a long time.
if closeErr := readCloser . Close ( ) ; closeErr != nil && err == nil {
err = closeErr
if seen [ currentID ] {
continue
}
seen [ currentID ] = true
for i := 0 ; i < numThreads ; i ++ {
doneErr := <- done
if doneErr != nil && err == nil {
err = doneErr
// Load the parent commits for the one we are currently examining
numParents := current . commit . NumParents ( )
var parents [ ] * object . Commit
for i := 0 ; i < numParents ; i ++ {
parent , err := current . commit . Parent ( i )
if err != nil {
break
}
parents = append ( parents , parent )
}
return err
}
func ( state * getCommitsInfoState ) processGitLogOutput ( scanner * bufio . Scanner ) error {
// keep a local cache of seen paths to avoid acquiring a lock for paths
// we've already seen
seenPaths := make ( map [ string ] struct { } , len ( state . entryPaths ) )
// number of consecutive commits without any finds
coldStreak := 0
var commit * Commit
var err error
for scanner . Scan ( ) {
line := scanner . Text ( )
if len ( line ) == 0 { // in-between commits
numRemainingEntries := state . numRemainingEntries ( )
if numRemainingEntries == 0 {
// Examine the current commit and set of interesting paths
numOfParentsWithPath := make ( [ ] int , len ( current . paths ) )
pathChanged := make ( [ ] bool , len ( current . paths ) )
parentHashes := make ( [ ] map [ string ] plumbing . Hash , len ( parents ) )
for j , parent := range parents {
parentHashes [ j ] , err = getFileHashes ( parent , treePath , current . paths )
if err != nil {
break
}
if coldStreak >= deferToTargetedSearchColdStreak &&
numRemainingEntries <= deferToTargetedSearchNumRemainingEntries {
// stop this untargeted search, and let the targeted-search threads
// finish the work
break
for i , path := range current . paths {
if parentHashes [ j ] [ path ] != plumbing . ZeroHash {
numOfParentsWithPath [ i ] ++
if parentHashes [ j ] [ path ] != current . hashes [ path ] {
pathChanged [ i ] = true
}
continue
}
if line [ 0 ] >= 'A' && line [ 0 ] <= 'X' { // a file was changed by the current commit
// look for the last tab, since for copies (C) and renames (R) two
// filenames are printed: src, then dest
tabIndex := strings . LastIndexByte ( line , '\t' )
if tabIndex < 1 {
return fmt . Errorf ( "misformatted line: %s" , line )
}
entryPath , err := state . cleanEntryPath ( line [ tabIndex + 1 : ] )
if err != nil {
return err
}
if _ , ok := seenPaths [ entryPath ] ; ! ok {
if state . update ( entryPath , commit ) {
coldStreak = 0
var remainingPaths [ ] string
for i , path := range current . paths {
switch numOfParentsWithPath [ i ] {
case 0 :
// The path didn't exist in any parent, so it must have been created by
// this commit. The results could already contain some newer change from
// different path, so don't override that.
if result [ path ] == nil {
result [ path ] = current . commit
}
case 1 :
// The file is present on exactly one parent, so check if it was changed
// and save the revision if it did.
if pathChanged [ i ] {
if result [ path ] == nil {
result [ path ] = current . commit
}
seenPaths [ entryPath ] = struct { } { }
} else {
remainingPaths = append ( remainingPaths , path )
}
default :
// The file is present on more than one of the parent paths, so this is
// a merge. We have to examine all the parent trees to find out where
// the change occurred. pathChanged[i] would tell us that the file was
// changed during the merge, but it wouldn't tell us the relevant commit
// that introduced it.
remainingPaths = append ( remainingPaths , path )
}
}
if len ( remainingPaths ) > 0 {
// Add the parent nodes along with remaining paths to the heap for further
// processing.
for j , parent := range parents {
if seen [ parent . ID ( ) ] {
continue
}
// a new commit
commit , err = parseCommitInfo ( line )
if err != nil {
return err
// Combine remainingPath with paths available on the parent branch
// and make union of them
var remainingPathsForParent [ ] string
for _ , path := range remainingPaths {
if parentHashes [ j ] [ path ] != plumbing . ZeroHash {
remainingPathsForParent = append ( remainingPathsForParent , path )
}
coldStreak ++
}
return scanner . Err ( )
}
// parseCommitInfo parse a commit from a line of `git log` output. Expects the
// line to be formatted according to getCommitsInfoPretty.
func parseCommitInfo ( line string ) ( * Commit , error ) {
if len ( line ) < 43 {
return nil , fmt . Errorf ( "invalid git output: %s" , line )
}
ref , err := NewIDFromString ( line [ : 40 ] )
if err != nil {
return nil , err
heap . Push ( & commitAndPaths { parent , remainingPathsForParent , parentHashes [ j ] } )
}
spaceIndex := strings . IndexByte ( line [ 41 : ] , ' ' )
if spaceIndex < 0 {
return nil , fmt . Errorf ( "invalid git output: %s" , line )
}
unixSeconds , err := strconv . Atoi ( line [ 41 : 41 + spaceIndex ] )
if err != nil {
return nil , err
}
message := line [ spaceIndex + 42 : ]
return & Commit {
ID : ref ,
CommitMessage : message ,
Committer : & Signature {
When : time . Unix ( int64 ( unixSeconds ) , 0 ) ,
} ,
} , nil
return result , nil
}