@ -21,10 +21,12 @@ type TableDiffCellType uint8
// TableDiffCellType possible values.
// TableDiffCellType possible values.
const (
const (
TableDiffCellEqual TableDiffCellType = iota + 1
TableDiffCellUnchanged TableDiffCellType = iota + 1
TableDiffCellChanged
TableDiffCellChanged
TableDiffCellAdd
TableDiffCellAdd
TableDiffCellDel
TableDiffCellDel
TableDiffCellMovedUnchanged
TableDiffCellMovedChanged
)
)
// TableDiffCell represents a cell of a TableDiffRow
// TableDiffCell represents a cell of a TableDiffRow
@ -53,6 +55,9 @@ type csvReader struct {
eof bool
eof bool
}
}
// ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
var ErrorUndefinedCell = errors . New ( "undefined cell" )
// createCsvReader creates a csvReader and fills the buffer
// createCsvReader creates a csvReader and fills the buffer
func createCsvReader ( reader * csv . Reader , bufferRowCount int ) ( * csvReader , error ) {
func createCsvReader ( reader * csv . Reader , bufferRowCount int ) ( * csvReader , error ) {
csv := & csvReader { reader : reader }
csv := & csvReader { reader : reader }
@ -70,7 +75,7 @@ func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error)
// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
func ( csv * csvReader ) GetRow ( row int ) ( [ ] string , error ) {
func ( csv * csvReader ) GetRow ( row int ) ( [ ] string , error ) {
if row < len ( csv . buffer ) {
if row < len ( csv . buffer ) && row >= 0 {
return csv . buffer [ row ] , nil
return csv . buffer [ row ] , nil
}
}
if csv . eof {
if csv . eof {
@ -131,7 +136,11 @@ func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*Tab
}
}
cells := make ( [ ] * TableDiffCell , len ( row ) )
cells := make ( [ ] * TableDiffCell , len ( row ) )
for j := 0 ; j < len ( row ) ; j ++ {
for j := 0 ; j < len ( row ) ; j ++ {
cells [ j ] = & TableDiffCell { LeftCell : row [ j ] , Type : celltype }
if celltype == TableDiffCellDel {
cells [ j ] = & TableDiffCell { LeftCell : row [ j ] , Type : celltype }
} else {
cells [ j ] = & TableDiffCell { RightCell : row [ j ] , Type : celltype }
}
}
}
rows = append ( rows , & TableDiffRow { RowIdx : i , Cells : cells } )
rows = append ( rows , & TableDiffRow { RowIdx : i , Cells : cells } )
i ++
i ++
@ -141,185 +150,267 @@ func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*Tab
}
}
func createCsvDiff ( diffFile * DiffFile , baseReader * csv . Reader , headReader * csv . Reader ) ( [ ] * TableDiffSection , error ) {
func createCsvDiff ( diffFile * DiffFile , baseReader * csv . Reader , headReader * csv . Reader ) ( [ ] * TableDiffSection , error ) {
a , err := createCsvReader ( baseReader , maxRowsToInspect )
// Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
baseCSVReader , err := createCsvReader ( baseReader , maxRowsToInspect )
if err != nil {
if err != nil {
return nil , err
return nil , err
}
}
headCSVReader , err := createCsvReader ( headReader , maxRowsToInspect )
b , err := createCsvReader ( headReader , maxRowsToInspect )
if err != nil {
if err != nil {
return nil , err
return nil , err
}
}
a2b , b2a := getColumnMapping ( a , b )
// Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
a2bColMap , b2aColMap := getColumnMapping ( baseCSVReader , headCSVReader )
columns := len ( a2b ) + countUnmappedColumns ( b2a )
// Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
if len ( a2b ) < len ( b2a ) {
numDiffTableCols := len ( a2bColMap ) + countUnmappedColumns ( b2aColMap )
columns = len ( b2a ) + countUnmappedColumns ( a2b )
if len ( a2bColMap ) < len ( b2aColMap ) {
numDiffTableCols = len ( b2aColMap ) + countUnmappedColumns ( a2bColMap )
}
}
createDiffRow := func ( aline int , bline int ) ( * TableDiffRow , error ) {
// createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
cells := make ( [ ] * TableDiffCell , columns )
// in the base or head respectively.
// Returns a TableDiffRow which has the row index
if aline == 0 || bline == 0 {
createDiffTableRow := func ( aLineNum int , bLineNum int ) ( * TableDiffRow , error ) {
var (
// diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
row [ ] string
// the same size as the head table or bigger
celltype TableDiffCellType
diffTableCells := make ( [ ] * TableDiffCell , numDiffTableCols )
err error
var bRow * [ ] string
)
if bLineNum > 0 {
if bline == 0 {
row , err := headCSVReader . GetRow ( bLineNum - 1 )
row , err = a . GetRow ( aline - 1 )
celltype = TableDiffCellDel
} else {
row , err = b . GetRow ( bline - 1 )
celltype = TableDiffCellAdd
}
if err != nil {
if err != nil {
return nil , err
return nil , err
}
}
if row == nil {
bRow = & row
return nil , nil
}
for i := 0 ; i < len ( row ) ; i ++ {
cells [ i ] = & TableDiffCell { LeftCell : row [ i ] , Type : celltype }
}
return & TableDiffRow { RowIdx : bline , Cells : cells } , nil
}
}
var aRow * [ ] string
arow , err := a . GetRow ( aline - 1 )
if aLineNum > 0 {
if err != nil {
row , err := baseCSVReader . GetRow ( aLineNum - 1 )
return nil , err
if err != nil {
}
return nil , err
brow , err := b . GetRow ( bline - 1 )
}
if err != nil {
aRow = & row
return nil , err
}
}
if len ( arow ) == 0 && len ( brow ) == 0 {
if aRow == nil && bRow == nil {
// No content
return nil , nil
return nil , nil
}
}
for i := 0 ; i < len ( a2b ) ; i ++ {
aIndex := 0 // tracks where we are in the a2bColMap
acell , _ := getCell ( arow , i )
bIndex := 0 // tracks where we are in the b2aColMap
if a2b [ i ] == unmappedColumn {
colsAdded := 0 // incremented whenever we found a column was added
cells [ i ] = & TableDiffCell { LeftCell : acell , Type : TableDiffCellDel }
colsDeleted := 0 // incrememted whenever a column was deleted
} else {
bcell , _ := getCell ( brow , a2b [ i ] )
// We loop until both the aIndex and bIndex are greater than their col map, which then we are done
for aIndex < len ( a2bColMap ) || bIndex < len ( b2aColMap ) {
celltype := TableDiffCellChanged
// Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
if acell == bcell {
for aIndex < len ( a2bColMap ) && a2bColMap [ aIndex ] == - 1 && ( bIndex >= len ( b2aColMap ) || aIndex <= bIndex ) {
celltype = TableDiffCellEqual
var aCell string
if aRow != nil {
if cell , err := getCell ( * aRow , aIndex ) ; err != nil {
if err != ErrorUndefinedCell {
return nil , err
}
} else {
aCell = cell
}
}
}
diffTableCells [ bIndex + colsDeleted ] = & TableDiffCell { LeftCell : aCell , Type : TableDiffCellDel }
aIndex ++
colsDeleted ++
}
cells [ i ] = & TableDiffCell { LeftCell : acell , RightCell : bcell , Type : celltype }
// aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
// we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
for aIndex < len ( a2bColMap ) && a2bColMap [ aIndex ] != - 1 {
aIndex ++
}
}
}
for i := 0 ; i < len ( b2a ) ; i ++ {
// Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
if b2a [ i ] == unmappedColumn {
for bIndex < len ( b2aColMap ) && b2aColMap [ bIndex ] == - 1 && ( aIndex >= len ( a2bColMap ) || bIndex < aIndex ) {
bcell , _ := getCell ( brow , i )
var bCell string
cells [ i ] = & TableDiffCell { LeftCell : bcell , Type : TableDiffCellAdd }
cellType := TableDiffCellAdd
if bRow != nil {
if cell , err := getCell ( * bRow , bIndex ) ; err != nil {
if err != ErrorUndefinedCell {
return nil , err
}
} else {
bCell = cell
}
} else {
cellType = TableDiffCellDel
}
diffTableCells [ bIndex + colsDeleted ] = & TableDiffCell { RightCell : bCell , Type : cellType }
bIndex ++
colsAdded ++
}
// aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
// we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
for bIndex < len ( b2aColMap ) && b2aColMap [ bIndex ] != - 1 && ( aIndex >= len ( a2bColMap ) || bIndex < aIndex ) {
var diffTableCell TableDiffCell
var aCell * string
// get the aCell value if the aRow exists
if aRow != nil {
if cell , err := getCell ( * aRow , b2aColMap [ bIndex ] ) ; err != nil {
if err != ErrorUndefinedCell {
return nil , err
}
} else {
aCell = & cell
diffTableCell . LeftCell = cell
}
} else {
diffTableCell . Type = TableDiffCellAdd
}
var bCell * string
// get the bCell value if the bRow exists
if bRow != nil {
if cell , err := getCell ( * bRow , bIndex ) ; err != nil {
if err != ErrorUndefinedCell {
return nil , err
}
} else {
bCell = & cell
diffTableCell . RightCell = cell
}
} else {
diffTableCell . Type = TableDiffCellDel
}
// if both a and b have a row that exists, compare the value and determine if the row has moved
if aCell != nil && bCell != nil {
moved := ( ( bIndex + colsDeleted ) != ( b2aColMap [ bIndex ] + colsAdded ) )
if * aCell != * bCell {
if moved {
diffTableCell . Type = TableDiffCellMovedChanged
} else {
diffTableCell . Type = TableDiffCellChanged
}
} else {
if moved {
diffTableCell . Type = TableDiffCellMovedUnchanged
} else {
diffTableCell . Type = TableDiffCellUnchanged
}
diffTableCell . LeftCell = ""
}
}
// Add the diff column to the diff row
diffTableCells [ bIndex + colsDeleted ] = & diffTableCell
bIndex ++
}
}
}
}
return & TableDiffRow { RowIdx : bline , Cells : cells } , nil
return & TableDiffRow { RowIdx : bLineNum , Cells : diffTableC ells} , nil
}
}
var sections [ ] * TableDiffSection
// diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
var diffTableSections [ ] * TableDiffSection
for i , section := range diffFile . Sections {
for i , section := range diffFile . Sections {
var rows [ ] * TableDiffRow
// Each section has multiple diffTableRows
var diffTableRows [ ] * TableDiffRow
lines := tryMergeLines ( section . Lines )
lines := tryMergeLines ( section . Lines )
// Loop through the merged lines to get each row of the CSV diff table for this section
for j , line := range lines {
for j , line := range lines {
if i == 0 && j == 0 && ( line [ 0 ] != 1 || line [ 1 ] != 1 ) {
if i == 0 && j == 0 && ( line [ 0 ] != 1 || line [ 1 ] != 1 ) {
diffRow , err := createDiffRow ( 1 , 1 )
diffTable Row , err := createDiffTable Row ( 1 , 1 )
if err != nil {
if err != nil {
return nil , err
return nil , err
}
}
if diffRow != nil {
if diffTable Row != nil {
rows = append ( rows , diffRow )
diffTableR ows = append ( diffTableR ows, diffTable Row )
}
}
}
}
diffRow , err := createDiffRow ( line [ 0 ] , line [ 1 ] )
diffTable Row , err := createDiffTable Row ( line [ 0 ] , line [ 1 ] )
if err != nil {
if err != nil {
return nil , err
return nil , err
}
}
if diffRow != nil {
if diffTable Row != nil {
rows = append ( rows , diffRow )
diffTableR ows = append ( diffTableR ows, diffTable Row )
}
}
}
}
if len ( rows ) > 0 {
if len ( diffTableR ows) > 0 {
sections = append ( sections , & TableDiffSection { Rows : rows } )
diffTableS ections = append ( diffTableS ections, & TableDiffSection { Rows : diffTableR ows} )
}
}
}
}
return sections , nil
return diffTableS ections, nil
}
}
// getColumnMapping creates a mapping of columns between a and b
// getColumnMapping creates a mapping of columns between a and b
func getColumnMapping ( a * csvReader , b * csvReader ) ( [ ] int , [ ] int ) {
func getColumnMapping ( b aseCSVReader * csvReader , headCSVReader * csvReader ) ( [ ] int , [ ] int ) {
arow , _ := a . GetRow ( 0 )
baseR ow, _ := b aseCSVReader . GetRow ( 0 )
brow , _ := b . GetRow ( 0 )
headR ow, _ := headCSVReader . GetRow ( 0 )
a2b := [ ] int { }
base2HeadColMap := [ ] int { }
b2a := [ ] int { }
head2BaseColMap := [ ] int { }
if arow != nil {
if baseR ow != nil {
a2b = make ( [ ] int , len ( arow ) )
base2HeadColMap = make ( [ ] int , len ( baseR ow) )
}
}
if brow != nil {
if headR ow != nil {
b2a = make ( [ ] int , len ( brow ) )
head2BaseColMap = make ( [ ] int , len ( headR ow) )
}
}
for i := 0 ; i < len ( b2a ) ; i ++ {
// Initializes all head2base mappings to be unmappedColumn (-1)
b2a [ i ] = unmappedColumn
for i := 0 ; i < len ( head2BaseColMap ) ; i ++ {
head2BaseColMap [ i ] = unmappedColumn
}
}
bcol := 0
// Loops through the baseRow and see if there is a match in the head row
for i := 0 ; i < len ( a2b ) ; i ++ {
for i := 0 ; i < len ( baseRow ) ; i ++ {
a2b [ i ] = unmappedColumn
base2HeadColMap [ i ] = unmappedColumn
baseCell , err := getCell ( baseRow , i )
acell , ea := getCell ( arow , i )
if err == nil {
if ea == nil {
for j := 0 ; j < len ( headRow ) ; j ++ {
for j := bcol ; j < len ( b2a ) ; j ++ {
if head2BaseColMap [ j ] == - 1 {
bcell , eb := getCell ( brow , j )
headCell , err := getCell ( headR ow, j )
if eb == nil && acell == bcell {
if err == nil && baseCell == headC ell {
a2b [ i ] = j
base2HeadColMap [ i ] = j
b2a [ j ] = i
head2BaseColMap [ j ] = i
bcol = j + 1
break
break
}
}
}
}
}
}
}
}
}
tryMapColumnsByContent ( a , a2b , b , b2a )
tryMapColumnsByContent ( baseCSVReader , base2HeadColMap , headCSVReader , head2BaseColMap )
tryMapColumnsByContent ( b , b2a , a , a2b )
tryMapColumnsByContent ( headCSVReader , head2BaseColMap , baseCSVReader , base2HeadColMap )
return a2b , b2a
return base2HeadColMap , head2BaseColMap
}
}
// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
func tryMapColumnsByContent ( a * csvReader , a2b [ ] int , b * csvReader , b2a [ ] int ) {
func tryMapColumnsByContent ( b aseCSVReader * csvReader , base2HeadColMap [ ] int , headCSVReader * csvReader , head2BaseColMap [ ] int ) {
start := 0
for i := 0 ; i < len ( base2HeadColMap ) ; i ++ {
for i := 0 ; i < len ( a2b ) ; i ++ {
headStart := 0
if a2b [ i ] == unmappedColumn {
for base2HeadColMap [ i ] == unmappedColumn && headStart < len ( head2BaseColMap ) {
if b2a [ s tart] == unmappedColumn {
if head2BaseColMap [ headS tart] == unmappedColumn {
rows := util . Min ( maxRowsToInspect , util . Max ( 0 , util . Min ( len ( a . buffer ) , len ( b . buffer ) ) - 1 ) )
rows := util . Min ( maxRowsToInspect , util . Max ( 0 , util . Min ( len ( b aseCSVReader . buffer ) , len ( headCSVReader . buffer ) ) - 1 ) )
same := 0
same := 0
for j := 1 ; j <= rows ; j ++ {
for j := 1 ; j <= rows ; j ++ {
acell , ea := getCell ( a . buffer [ j ] , i )
baseCell , baseErr := getCell ( b aseCSVReader . buffer [ j ] , i )
bcell , eb := getCell ( b . buffer [ j ] , start + 1 )
headCell , headErr := getCell ( headCSVReader . buffer [ j ] , headStart )
if ea == nil && eb == nil && acell == bc ell {
if baseErr == nil && headErr == nil && baseCell == headC ell {
same ++
same ++
}
}
}
}
if ( float32 ( same ) / float32 ( rows ) ) > minRatioToMatch {
if ( float32 ( same ) / float32 ( rows ) ) > minRatioToMatch {
a2b [ i ] = start + 1
base2HeadColMap [ i ] = headStart
b2a [ start + 1 ] = i
head2BaseColMap [ headStart ] = i
}
}
}
}
headStart ++
}
}
start = a2b [ i ]
}
}
}
}
@ -328,7 +419,7 @@ func getCell(row []string, column int) (string, error) {
if column < len ( row ) {
if column < len ( row ) {
return row [ column ] , nil
return row [ column ] , nil
}
}
return "" , errors . New ( "Undefined column" )
return "" , ErrorUndefinedCell
}
}
// countUnmappedColumns returns the count of unmapped columns.
// countUnmappedColumns returns the count of unmapped columns.