|
|
|
@ -7,6 +7,7 @@ package cmp
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"fmt"
|
|
|
|
|
"math"
|
|
|
|
|
"reflect"
|
|
|
|
|
"strconv"
|
|
|
|
|
"strings"
|
|
|
|
@ -26,8 +27,6 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
|
|
|
|
|
return false // No differences detected
|
|
|
|
|
case !v.ValueX.IsValid() || !v.ValueY.IsValid():
|
|
|
|
|
return false // Both values must be valid
|
|
|
|
|
case v.Type.Kind() == reflect.Slice && (v.ValueX.Len() == 0 || v.ValueY.Len() == 0):
|
|
|
|
|
return false // Both slice values have to be non-empty
|
|
|
|
|
case v.NumIgnored > 0:
|
|
|
|
|
return false // Some ignore option was used
|
|
|
|
|
case v.NumTransformed > 0:
|
|
|
|
@ -45,7 +44,16 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch t := v.Type; t.Kind() {
|
|
|
|
|
// Check whether this is an interface with the same concrete types.
|
|
|
|
|
t := v.Type
|
|
|
|
|
vx, vy := v.ValueX, v.ValueY
|
|
|
|
|
if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() {
|
|
|
|
|
vx, vy = vx.Elem(), vy.Elem()
|
|
|
|
|
t = vx.Type()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check whether we provide specialized diffing for this type.
|
|
|
|
|
switch t.Kind() {
|
|
|
|
|
case reflect.String:
|
|
|
|
|
case reflect.Array, reflect.Slice:
|
|
|
|
|
// Only slices of primitive types have specialized handling.
|
|
|
|
@ -57,6 +65,11 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Both slice values have to be non-empty.
|
|
|
|
|
if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If a sufficient number of elements already differ,
|
|
|
|
|
// use specialized formatting even if length requirement is not met.
|
|
|
|
|
if v.NumDiff > v.NumSame {
|
|
|
|
@ -68,7 +81,7 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
|
|
|
|
|
|
|
|
|
|
// Use specialized string diffing for longer slices or strings.
|
|
|
|
|
const minLength = 64
|
|
|
|
|
return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength
|
|
|
|
|
return vx.Len() >= minLength && vy.Len() >= minLength
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
|
|
|
|
@ -77,17 +90,23 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
|
|
|
|
|
func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
|
|
|
|
assert(opts.DiffMode == diffUnknown)
|
|
|
|
|
t, vx, vy := v.Type, v.ValueX, v.ValueY
|
|
|
|
|
if t.Kind() == reflect.Interface {
|
|
|
|
|
vx, vy = vx.Elem(), vy.Elem()
|
|
|
|
|
t = vx.Type()
|
|
|
|
|
opts = opts.WithTypeMode(emitType)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Auto-detect the type of the data.
|
|
|
|
|
var isLinedText, isText, isBinary bool
|
|
|
|
|
var sx, sy string
|
|
|
|
|
var ssx, ssy []string
|
|
|
|
|
var isString, isMostlyText, isPureLinedText, isBinary bool
|
|
|
|
|
switch {
|
|
|
|
|
case t.Kind() == reflect.String:
|
|
|
|
|
sx, sy = vx.String(), vy.String()
|
|
|
|
|
isText = true // Initial estimate, verify later
|
|
|
|
|
isString = true
|
|
|
|
|
case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
|
|
|
|
|
sx, sy = string(vx.Bytes()), string(vy.Bytes())
|
|
|
|
|
isBinary = true // Initial estimate, verify later
|
|
|
|
|
isString = true
|
|
|
|
|
case t.Kind() == reflect.Array:
|
|
|
|
|
// Arrays need to be addressable for slice operations to work.
|
|
|
|
|
vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
|
|
|
|
@ -95,13 +114,12 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
|
|
|
|
vy2.Set(vy)
|
|
|
|
|
vx, vy = vx2, vy2
|
|
|
|
|
}
|
|
|
|
|
if isText || isBinary {
|
|
|
|
|
var numLines, lastLineIdx, maxLineLen int
|
|
|
|
|
isBinary = !utf8.ValidString(sx) || !utf8.ValidString(sy)
|
|
|
|
|
if isString {
|
|
|
|
|
var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int
|
|
|
|
|
for i, r := range sx + sy {
|
|
|
|
|
if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
|
|
|
|
|
isBinary = true
|
|
|
|
|
break
|
|
|
|
|
numTotalRunes++
|
|
|
|
|
if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError {
|
|
|
|
|
numValidRunes++
|
|
|
|
|
}
|
|
|
|
|
if r == '\n' {
|
|
|
|
|
if maxLineLen < i-lastLineIdx {
|
|
|
|
@ -111,8 +129,26 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
|
|
|
|
numLines++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
isText = !isBinary
|
|
|
|
|
isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
|
|
|
|
|
isPureText := numValidRunes == numTotalRunes
|
|
|
|
|
isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes))
|
|
|
|
|
isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024
|
|
|
|
|
isBinary = !isMostlyText
|
|
|
|
|
|
|
|
|
|
// Avoid diffing by lines if it produces a significantly more complex
|
|
|
|
|
// edit script than diffing by bytes.
|
|
|
|
|
if isPureLinedText {
|
|
|
|
|
ssx = strings.Split(sx, "\n")
|
|
|
|
|
ssy = strings.Split(sy, "\n")
|
|
|
|
|
esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result {
|
|
|
|
|
return diff.BoolResult(ssx[ix] == ssy[iy])
|
|
|
|
|
})
|
|
|
|
|
esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result {
|
|
|
|
|
return diff.BoolResult(sx[ix] == sy[iy])
|
|
|
|
|
})
|
|
|
|
|
efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
|
|
|
|
|
efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
|
|
|
|
|
isPureLinedText = efficiencyLines < 4*efficiencyBytes
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Format the string into printable records.
|
|
|
|
@ -121,9 +157,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
|
|
|
|
switch {
|
|
|
|
|
// If the text appears to be multi-lined text,
|
|
|
|
|
// then perform differencing across individual lines.
|
|
|
|
|
case isLinedText:
|
|
|
|
|
ssx := strings.Split(sx, "\n")
|
|
|
|
|
ssy := strings.Split(sy, "\n")
|
|
|
|
|
case isPureLinedText:
|
|
|
|
|
list = opts.formatDiffSlice(
|
|
|
|
|
reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
|
|
|
|
|
func(v reflect.Value, d diffMode) textRecord {
|
|
|
|
@ -212,7 +246,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
|
|
|
|
// If the text appears to be single-lined text,
|
|
|
|
|
// then perform differencing in approximately fixed-sized chunks.
|
|
|
|
|
// The output is printed as quoted strings.
|
|
|
|
|
case isText:
|
|
|
|
|
case isMostlyText:
|
|
|
|
|
list = opts.formatDiffSlice(
|
|
|
|
|
reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
|
|
|
|
|
func(v reflect.Value, d diffMode) textRecord {
|
|
|
|
@ -220,7 +254,6 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
|
|
|
|
return textRecord{Diff: d, Value: textLine(s)}
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
delim = ""
|
|
|
|
|
|
|
|
|
|
// If the text appears to be binary data,
|
|
|
|
|
// then perform differencing in approximately fixed-sized chunks.
|
|
|
|
@ -282,7 +315,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
|
|
|
|
|
|
|
|
|
// Wrap the output with appropriate type information.
|
|
|
|
|
var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
|
|
|
|
|
if !isText {
|
|
|
|
|
if !isMostlyText {
|
|
|
|
|
// The "{...}" byte-sequence literal is not valid Go syntax for strings.
|
|
|
|
|
// Emit the type for extra clarity (e.g. "string{...}").
|
|
|
|
|
if t.Kind() == reflect.String {
|
|
|
|
@ -321,8 +354,11 @@ func (opts formatOptions) formatDiffSlice(
|
|
|
|
|
vx, vy reflect.Value, chunkSize int, name string,
|
|
|
|
|
makeRec func(reflect.Value, diffMode) textRecord,
|
|
|
|
|
) (list textList) {
|
|
|
|
|
es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
|
|
|
|
|
return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
|
|
|
|
|
eq := func(ix, iy int) bool {
|
|
|
|
|
return vx.Index(ix).Interface() == vy.Index(iy).Interface()
|
|
|
|
|
}
|
|
|
|
|
es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
|
|
|
|
|
return diff.BoolResult(eq(ix, iy))
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
appendChunks := func(v reflect.Value, d diffMode) int {
|
|
|
|
@ -347,6 +383,7 @@ func (opts formatOptions) formatDiffSlice(
|
|
|
|
|
|
|
|
|
|
groups := coalesceAdjacentEdits(name, es)
|
|
|
|
|
groups = coalesceInterveningIdentical(groups, chunkSize/4)
|
|
|
|
|
groups = cleanupSurroundingIdentical(groups, eq)
|
|
|
|
|
maxGroup := diffStats{Name: name}
|
|
|
|
|
for i, ds := range groups {
|
|
|
|
|
if maxLen >= 0 && numDiffs >= maxLen {
|
|
|
|
@ -399,25 +436,36 @@ func (opts formatOptions) formatDiffSlice(
|
|
|
|
|
|
|
|
|
|
// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
|
|
|
|
|
// equal or unequal counts.
|
|
|
|
|
//
|
|
|
|
|
// Example:
|
|
|
|
|
//
|
|
|
|
|
// Input: "..XXY...Y"
|
|
|
|
|
// Output: [
|
|
|
|
|
// {NumIdentical: 2},
|
|
|
|
|
// {NumRemoved: 2, NumInserted 1},
|
|
|
|
|
// {NumIdentical: 3},
|
|
|
|
|
// {NumInserted: 1},
|
|
|
|
|
// ]
|
|
|
|
|
//
|
|
|
|
|
func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
|
|
|
|
|
var prevCase int // Arbitrary index into which case last occurred
|
|
|
|
|
lastStats := func(i int) *diffStats {
|
|
|
|
|
if prevCase != i {
|
|
|
|
|
var prevMode byte
|
|
|
|
|
lastStats := func(mode byte) *diffStats {
|
|
|
|
|
if prevMode != mode {
|
|
|
|
|
groups = append(groups, diffStats{Name: name})
|
|
|
|
|
prevCase = i
|
|
|
|
|
prevMode = mode
|
|
|
|
|
}
|
|
|
|
|
return &groups[len(groups)-1]
|
|
|
|
|
}
|
|
|
|
|
for _, e := range es {
|
|
|
|
|
switch e {
|
|
|
|
|
case diff.Identity:
|
|
|
|
|
lastStats(1).NumIdentical++
|
|
|
|
|
lastStats('=').NumIdentical++
|
|
|
|
|
case diff.UniqueX:
|
|
|
|
|
lastStats(2).NumRemoved++
|
|
|
|
|
lastStats('!').NumRemoved++
|
|
|
|
|
case diff.UniqueY:
|
|
|
|
|
lastStats(2).NumInserted++
|
|
|
|
|
lastStats('!').NumInserted++
|
|
|
|
|
case diff.Modified:
|
|
|
|
|
lastStats(2).NumModified++
|
|
|
|
|
lastStats('!').NumModified++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return groups
|
|
|
|
@ -427,6 +475,35 @@ func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats)
|
|
|
|
|
// equal groups into adjacent unequal groups that currently result in a
|
|
|
|
|
// dual inserted/removed printout. This acts as a high-pass filter to smooth
|
|
|
|
|
// out high-frequency changes within the windowSize.
|
|
|
|
|
//
|
|
|
|
|
// Example:
|
|
|
|
|
//
|
|
|
|
|
// WindowSize: 16,
|
|
|
|
|
// Input: [
|
|
|
|
|
// {NumIdentical: 61}, // group 0
|
|
|
|
|
// {NumRemoved: 3, NumInserted: 1}, // group 1
|
|
|
|
|
// {NumIdentical: 6}, // ├── coalesce
|
|
|
|
|
// {NumInserted: 2}, // ├── coalesce
|
|
|
|
|
// {NumIdentical: 1}, // ├── coalesce
|
|
|
|
|
// {NumRemoved: 9}, // └── coalesce
|
|
|
|
|
// {NumIdentical: 64}, // group 2
|
|
|
|
|
// {NumRemoved: 3, NumInserted: 1}, // group 3
|
|
|
|
|
// {NumIdentical: 6}, // ├── coalesce
|
|
|
|
|
// {NumInserted: 2}, // ├── coalesce
|
|
|
|
|
// {NumIdentical: 1}, // ├── coalesce
|
|
|
|
|
// {NumRemoved: 7}, // ├── coalesce
|
|
|
|
|
// {NumIdentical: 1}, // ├── coalesce
|
|
|
|
|
// {NumRemoved: 2}, // └── coalesce
|
|
|
|
|
// {NumIdentical: 63}, // group 4
|
|
|
|
|
// ]
|
|
|
|
|
// Output: [
|
|
|
|
|
// {NumIdentical: 61},
|
|
|
|
|
// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3},
|
|
|
|
|
// {NumIdentical: 64},
|
|
|
|
|
// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3},
|
|
|
|
|
// {NumIdentical: 63},
|
|
|
|
|
// ]
|
|
|
|
|
//
|
|
|
|
|
func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
|
|
|
|
|
groups, groupsOrig := groups[:0], groups
|
|
|
|
|
for i, ds := range groupsOrig {
|
|
|
|
@ -446,3 +523,91 @@ func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStat
|
|
|
|
|
}
|
|
|
|
|
return groups
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// cleanupSurroundingIdentical scans through all unequal groups, and
|
|
|
|
|
// moves any leading sequence of equal elements to the preceding equal group and
|
|
|
|
|
// moves and trailing sequence of equal elements to the succeeding equal group.
|
|
|
|
|
//
|
|
|
|
|
// This is necessary since coalesceInterveningIdentical may coalesce edit groups
|
|
|
|
|
// together such that leading/trailing spans of equal elements becomes possible.
|
|
|
|
|
// Note that this can occur even with an optimal diffing algorithm.
|
|
|
|
|
//
|
|
|
|
|
// Example:
|
|
|
|
|
//
|
|
|
|
|
// Input: [
|
|
|
|
|
// {NumIdentical: 61},
|
|
|
|
|
// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements
|
|
|
|
|
// {NumIdentical: 67},
|
|
|
|
|
// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements
|
|
|
|
|
// {NumIdentical: 54},
|
|
|
|
|
// ]
|
|
|
|
|
// Output: [
|
|
|
|
|
// {NumIdentical: 64}, // incremented by 3
|
|
|
|
|
// {NumRemoved: 9},
|
|
|
|
|
// {NumIdentical: 67},
|
|
|
|
|
// {NumRemoved: 9},
|
|
|
|
|
// {NumIdentical: 64}, // incremented by 10
|
|
|
|
|
// ]
|
|
|
|
|
//
|
|
|
|
|
func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats {
|
|
|
|
|
var ix, iy int // indexes into sequence x and y
|
|
|
|
|
for i, ds := range groups {
|
|
|
|
|
// Handle equal group.
|
|
|
|
|
if ds.NumDiff() == 0 {
|
|
|
|
|
ix += ds.NumIdentical
|
|
|
|
|
iy += ds.NumIdentical
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle unequal group.
|
|
|
|
|
nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified
|
|
|
|
|
ny := ds.NumIdentical + ds.NumInserted + ds.NumModified
|
|
|
|
|
var numLeadingIdentical, numTrailingIdentical int
|
|
|
|
|
for i := 0; i < nx && i < ny && eq(ix+i, iy+i); i++ {
|
|
|
|
|
numLeadingIdentical++
|
|
|
|
|
}
|
|
|
|
|
for i := 0; i < nx && i < ny && eq(ix+nx-1-i, iy+ny-1-i); i++ {
|
|
|
|
|
numTrailingIdentical++
|
|
|
|
|
}
|
|
|
|
|
if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 {
|
|
|
|
|
if numLeadingIdentical > 0 {
|
|
|
|
|
// Remove leading identical span from this group and
|
|
|
|
|
// insert it into the preceding group.
|
|
|
|
|
if i-1 >= 0 {
|
|
|
|
|
groups[i-1].NumIdentical += numLeadingIdentical
|
|
|
|
|
} else {
|
|
|
|
|
// No preceding group exists, so prepend a new group,
|
|
|
|
|
// but do so after we finish iterating over all groups.
|
|
|
|
|
defer func() {
|
|
|
|
|
groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...)
|
|
|
|
|
}()
|
|
|
|
|
}
|
|
|
|
|
// Increment indexes since the preceding group would have handled this.
|
|
|
|
|
ix += numLeadingIdentical
|
|
|
|
|
iy += numLeadingIdentical
|
|
|
|
|
}
|
|
|
|
|
if numTrailingIdentical > 0 {
|
|
|
|
|
// Remove trailing identical span from this group and
|
|
|
|
|
// insert it into the succeeding group.
|
|
|
|
|
if i+1 < len(groups) {
|
|
|
|
|
groups[i+1].NumIdentical += numTrailingIdentical
|
|
|
|
|
} else {
|
|
|
|
|
// No succeeding group exists, so append a new group,
|
|
|
|
|
// but do so after we finish iterating over all groups.
|
|
|
|
|
defer func() {
|
|
|
|
|
groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical})
|
|
|
|
|
}()
|
|
|
|
|
}
|
|
|
|
|
// Do not increment indexes since the succeeding group will handle this.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update this group since some identical elements were removed.
|
|
|
|
|
nx -= numIdentical
|
|
|
|
|
ny -= numIdentical
|
|
|
|
|
groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny}
|
|
|
|
|
}
|
|
|
|
|
ix += nx
|
|
|
|
|
iy += ny
|
|
|
|
|
}
|
|
|
|
|
return groups
|
|
|
|
|
}
|
|
|
|
|