Skip to content

Commit ecda353

Browse files
authored
Merge pull request #1 from IanBruene/differ
Added port of Differ class, and tests.
2 parents 792786c + db85041 commit ecda353

File tree

2 files changed

+354
-0
lines changed

2 files changed

+354
-0
lines changed

difflib/difflib.go

+231
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ package difflib
1818
import (
1919
"bufio"
2020
"bytes"
21+
"errors"
2122
"fmt"
2223
"io"
2324
"strings"
25+
"unicode"
2426
)
2527

2628
func min(a, b int) int {
@@ -44,6 +46,14 @@ func calculateRatio(matches, length int) float64 {
4446
return 1.0
4547
}
4648

49+
func listifyString(str string) (lst []string) {
50+
lst = make([]string, len(str))
51+
for i, c := range str {
52+
lst[i] = string(c)
53+
}
54+
return lst
55+
}
56+
4757
type Match struct {
4858
A int
4959
B int
@@ -511,6 +521,227 @@ func (m *SequenceMatcher) RealQuickRatio() float64 {
511521
return calculateRatio(min(la, lb), la+lb)
512522
}
513523

524+
func count_leading(line string, ch byte) (count int) {
525+
// Return number of `ch` characters at the start of `line`.
526+
count = 0
527+
n := len(line)
528+
for (count < n) && (line[count] == ch) {
529+
count++
530+
}
531+
return count
532+
}
533+
534+
type Differ struct {
535+
Linejunk func(string) bool
536+
Charjunk func(string) bool
537+
}
538+
539+
func NewDiffer() *Differ {
540+
return &Differ{}
541+
}
542+
543+
func (d *Differ) Compare(a []string, b []string) (diffs []string, err error) {
544+
// Compare two sequences of lines; generate the resulting delta.
545+
546+
// Each sequence must contain individual single-line strings ending with
547+
// newlines. Such sequences can be obtained from the `readlines()` method
548+
// of file-like objects. The delta generated also consists of newline-
549+
// terminated strings, ready to be printed as-is via the writeline()
550+
// method of a file-like object.
551+
diffs = []string{}
552+
cruncher := NewMatcherWithJunk(a, b, true, d.Linejunk)
553+
opcodes := cruncher.GetOpCodes()
554+
for _, current := range opcodes {
555+
alo := current.I1
556+
ahi := current.I2
557+
blo := current.J1
558+
bhi := current.J2
559+
var g []string
560+
if current.Tag == 'r' {
561+
g, _ = d.FancyReplace(a, alo, ahi, b, blo, bhi)
562+
} else if current.Tag == 'd' {
563+
g = d.Dump("-", a, alo, ahi)
564+
} else if current.Tag == 'i' {
565+
g = d.Dump("+", b, blo, bhi)
566+
} else if current.Tag == 'e' {
567+
g = d.Dump(" ", a, alo, ahi)
568+
} else {
569+
return nil, errors.New(fmt.Sprintf("unknown tag %q", current.Tag))
570+
}
571+
diffs = append(diffs, g...)
572+
}
573+
return diffs, nil
574+
}
575+
576+
func (d *Differ) Dump(tag string, x []string, lo int, hi int) (out []string) {
577+
// Generate comparison results for a same-tagged range.
578+
out = []string{}
579+
for i := lo; i < hi; i++ {
580+
out = append(out, fmt.Sprintf("%s %s", tag, x[i]))
581+
}
582+
return out
583+
}
584+
585+
func (d *Differ) PlainReplace(a []string, alo int, ahi int, b []string, blo int, bhi int) (out []string, err error) {
586+
if !(alo < ahi) || !(blo < bhi) { // assertion
587+
return nil, errors.New("low greater than or equal to high")
588+
}
589+
// dump the shorter block first -- reduces the burden on short-term
590+
// memory if the blocks are of very different sizes
591+
if bhi-blo < ahi-alo {
592+
out = d.Dump("+", b, blo, bhi)
593+
out = append(out, d.Dump("-", a, alo, ahi)...)
594+
} else {
595+
out = d.Dump("-", a, alo, ahi)
596+
out = append(out, d.Dump("+", b, blo, bhi)...)
597+
}
598+
return out, nil
599+
}
600+
601+
func (d *Differ) FancyReplace(a []string, alo int, ahi int, b []string, blo int, bhi int) (out []string, err error) {
602+
// When replacing one block of lines with another, search the blocks
603+
// for *similar* lines; the best-matching pair (if any) is used as a
604+
// synch point, and intraline difference marking is done on the
605+
// similar pair. Lots of work, but often worth it.
606+
607+
// don't synch up unless the lines have a similarity score of at
608+
// least cutoff; best_ratio tracks the best score seen so far
609+
best_ratio := 0.74
610+
cutoff := 0.75
611+
cruncher := NewMatcherWithJunk(a, b, true, d.Charjunk)
612+
eqi := -1 // 1st indices of equal lines (if any)
613+
eqj := -1
614+
out = []string{}
615+
616+
// search for the pair that matches best without being identical
617+
// (identical lines must be junk lines, & we don't want to synch up
618+
// on junk -- unless we have to)
619+
var best_i, best_j int
620+
for j := blo; j < bhi; j++ {
621+
bj := b[j]
622+
cruncher.SetSeq2(listifyString(bj))
623+
for i := alo; i < ahi; i++ {
624+
ai := a[i]
625+
if ai == bj {
626+
if eqi == -1 {
627+
eqi = i
628+
eqj = j
629+
}
630+
continue
631+
}
632+
cruncher.SetSeq1(listifyString(ai))
633+
// computing similarity is expensive, so use the quick
634+
// upper bounds first -- have seen this speed up messy
635+
// compares by a factor of 3.
636+
// note that ratio() is only expensive to compute the first
637+
// time it's called on a sequence pair; the expensive part
638+
// of the computation is cached by cruncher
639+
if cruncher.RealQuickRatio() > best_ratio &&
640+
cruncher.QuickRatio() > best_ratio &&
641+
cruncher.Ratio() > best_ratio {
642+
best_ratio = cruncher.Ratio()
643+
best_i = i
644+
best_j = j
645+
}
646+
}
647+
}
648+
if best_ratio < cutoff {
649+
// no non-identical "pretty close" pair
650+
if eqi == -1 {
651+
// no identical pair either -- treat it as a straight replace
652+
out, _ = d.PlainReplace(a, alo, ahi, b, blo, bhi)
653+
return out, nil
654+
}
655+
// no close pair, but an identical pair -- synch up on that
656+
best_i = eqi
657+
best_j = eqj
658+
best_ratio = 1.0
659+
} else {
660+
// there's a close pair, so forget the identical pair (if any)
661+
eqi = -1
662+
}
663+
// a[best_i] very similar to b[best_j]; eqi is None iff they're not
664+
// identical
665+
666+
// pump out diffs from before the synch point
667+
out = append(out, d.fancyHelper(a, alo, best_i, b, blo, best_j)...)
668+
669+
// do intraline marking on the synch pair
670+
aelt, belt := a[best_i], b[best_j]
671+
if eqi == -1 {
672+
// pump out a '-', '?', '+', '?' quad for the synched lines
673+
var atags, btags string
674+
cruncher.SetSeqs(listifyString(aelt), listifyString(belt))
675+
opcodes := cruncher.GetOpCodes()
676+
for _, current := range opcodes {
677+
ai1 := current.I1
678+
ai2 := current.I2
679+
bj1 := current.J1
680+
bj2 := current.J2
681+
la, lb := ai2-ai1, bj2-bj1
682+
if current.Tag == 'r' {
683+
atags += strings.Repeat("^", la)
684+
btags += strings.Repeat("^", lb)
685+
} else if current.Tag == 'd' {
686+
atags += strings.Repeat("-", la)
687+
} else if current.Tag == 'i' {
688+
btags += strings.Repeat("+", lb)
689+
} else if current.Tag == 'e' {
690+
atags += strings.Repeat(" ", la)
691+
btags += strings.Repeat(" ", lb)
692+
} else {
693+
return nil, errors.New(fmt.Sprintf("unknown tag %q",
694+
current.Tag))
695+
}
696+
}
697+
out = append(out, d.QFormat(aelt, belt, atags, btags)...)
698+
} else {
699+
// the synch pair is identical
700+
out = append(out, " "+aelt)
701+
}
702+
// pump out diffs from after the synch point
703+
out = append(out, d.fancyHelper(a, best_i+1, ahi, b, best_j+1, bhi)...)
704+
return out, nil
705+
}
706+
707+
func (d *Differ) fancyHelper(a []string, alo int, ahi int, b []string, blo int, bhi int) (out []string) {
708+
if alo < ahi {
709+
if blo < bhi {
710+
out, _ = d.FancyReplace(a, alo, ahi, b, blo, bhi)
711+
} else {
712+
out = d.Dump("-", a, alo, ahi)
713+
}
714+
} else if blo < bhi {
715+
out = d.Dump("+", b, blo, bhi)
716+
} else {
717+
out = []string{}
718+
}
719+
return out
720+
}
721+
722+
func (d *Differ) QFormat(aline string, bline string, atags string, btags string) (out []string) {
723+
// Format "?" output and deal with leading tabs.
724+
725+
// Can hurt, but will probably help most of the time.
726+
common := min(count_leading(aline, '\t'), count_leading(bline, '\t'))
727+
common = min(common, count_leading(atags[:common], ' '))
728+
common = min(common, count_leading(btags[:common], ' '))
729+
atags = strings.TrimRightFunc(atags[common:], unicode.IsSpace)
730+
btags = strings.TrimRightFunc(btags[common:], unicode.IsSpace)
731+
732+
out = []string{"- " + aline}
733+
if len(atags) > 0 {
734+
out = append(out, fmt.Sprintf("? %s%s\n",
735+
strings.Repeat("\t", common), atags))
736+
}
737+
out = append(out, "+ "+bline)
738+
if len(btags) > 0 {
739+
out = append(out, fmt.Sprintf("? %s%s\n",
740+
strings.Repeat("\t", common), btags))
741+
}
742+
return out
743+
}
744+
514745
// Convert range to the "ed" format
515746
func formatRangeUnified(start, stop int) string {
516747
// Per the diff spec at http://www.unix.org/single_unix_specification/

difflib/difflib_test.go

+123
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ func splitChars(s string) []string {
3030
return chars
3131
}
3232

33+
func TestlistifyString(t *testing.T) {
34+
lst := listifyString("qwerty")
35+
if reflect.DeepEqual(lst, []string{"q", "w", "e", "r", "t", "y"}) != true {
36+
t.Fatal("listifyString failure:", lst)
37+
}
38+
}
39+
3340
func TestSequenceMatcherRatio(t *testing.T) {
3441
s := NewMatcher(splitChars("abcd"), splitChars("bcde"))
3542
assertEqual(t, s.Ratio(), 0.75)
@@ -424,3 +431,119 @@ func BenchmarkSplitLines100(b *testing.B) {
424431
func BenchmarkSplitLines10000(b *testing.B) {
425432
benchmarkSplitLines(b, 10000)
426433
}
434+
435+
func TestDifferCompare(t *testing.T) {
436+
diff := NewDiffer()
437+
// Test
438+
aLst := []string{"foo\n", "bar\n", "baz\n"}
439+
bLst := []string{"foo\n", "bar1\n", "asdf\n", "baz\n"}
440+
out, err := diff.Compare(aLst, bLst)
441+
if err != nil {
442+
t.Fatal("Differ Compare() error:", err)
443+
}
444+
if reflect.DeepEqual(out, []string{
445+
" foo\n",
446+
"- bar\n",
447+
"+ bar1\n",
448+
"? +\n",
449+
"+ asdf\n",
450+
" baz\n",
451+
}) != true {
452+
t.Fatal("Differ Compare failure:", out)
453+
}
454+
}
455+
456+
func TestDifferDump(t *testing.T) {
457+
diff := NewDiffer()
458+
out := diff.Dump("+",
459+
[]string{"foo", "bar", "baz", "quux", "qwerty"},
460+
1, 3)
461+
if reflect.DeepEqual(out, []string{"+ bar", "+ baz"}) != true {
462+
t.Fatal("Differ Dump() failure:", out)
463+
}
464+
}
465+
466+
func TestDifferPlainReplace(t *testing.T) {
467+
diff := NewDiffer()
468+
aLst := []string{"one\n", "two\n", "three\n", "four\n", "five\n"}
469+
bLst := []string{"one\n", "two2\n", "three\n", "extra\n"}
470+
// Test a then b
471+
out, err := diff.PlainReplace(aLst, 1, 2, bLst, 1, 2)
472+
if err != nil {
473+
t.Fatal("Differ PlainReplace() error:", err)
474+
}
475+
if reflect.DeepEqual(out, []string{"- two\n", "+ two2\n"}) != true {
476+
t.Fatal("Differ PlainReplace() failure:", out)
477+
}
478+
// Test b then a
479+
out, err = diff.PlainReplace(aLst, 3, 5, bLst, 3, 4)
480+
if err != nil {
481+
t.Fatal("Differ PlainReplace() error:", err)
482+
}
483+
if reflect.DeepEqual(out,
484+
[]string{"+ extra\n", "- four\n", "- five\n"}) != true {
485+
t.Fatal("Differ PlainReplace() failure:", out)
486+
}
487+
}
488+
489+
func TestDifferFancyReplaceAndHelper(t *testing.T) {
490+
diff := NewDiffer()
491+
// Test identical sync point, both full
492+
aLst := []string{"one\n", "asdf\n", "three\n"}
493+
bLst := []string{"one\n", "two2\n", "three\n"}
494+
out, err := diff.FancyReplace(aLst, 0, 3, bLst, 0, 3)
495+
if err != nil {
496+
t.Fatal("Differ FancyReplace() error:", err)
497+
}
498+
if reflect.DeepEqual(out,
499+
[]string{" one\n", "- asdf\n", "+ two2\n", " three\n"}) != true {
500+
t.Fatal("Differ FancyReplace() failure:", out)
501+
}
502+
// Test close sync point, both full
503+
aLst = []string{"one\n", "two123456\n", "asdf\n", "three\n"}
504+
bLst = []string{"one\n", "two123457\n", "qwerty\n", "three\n"}
505+
out, err = diff.FancyReplace(aLst, 1, 3, bLst, 1, 3)
506+
if err != nil {
507+
t.Fatal("Differ FancyReplace() error:", err)
508+
}
509+
if reflect.DeepEqual(out, []string{
510+
"- two123456\n",
511+
"? ^\n",
512+
"+ two123457\n",
513+
"? ^\n",
514+
"- asdf\n",
515+
"+ qwerty\n",
516+
}) != true {
517+
t.Fatal("Differ FancyReplace() failure:", out)
518+
}
519+
// Test no identical no close
520+
aLst = []string{"one\n", "asdf\n", "three\n"}
521+
bLst = []string{"one\n", "qwerty\n", "three\n"}
522+
out, err = diff.FancyReplace(aLst, 1, 2, bLst, 1, 2)
523+
if err != nil {
524+
t.Fatal("Differ FancyReplace() error:", err)
525+
}
526+
if reflect.DeepEqual(out, []string{
527+
"- asdf\n",
528+
"+ qwerty\n",
529+
}) != true {
530+
t.Fatal("Differ FancyReplace() failure:", out)
531+
}
532+
}
533+
534+
func TestDifferQFormat(t *testing.T) {
535+
diff := NewDiffer()
536+
aStr := "\tfoo2bar\n"
537+
aTag := " ^ ^"
538+
bStr := "\tfoo3baz\n"
539+
bTag := " ^ ^"
540+
out := diff.QFormat(aStr, bStr, aTag, bTag)
541+
if reflect.DeepEqual(out, []string{
542+
"- \tfoo2bar\n",
543+
"? \t ^ ^\n",
544+
"+ \tfoo3baz\n",
545+
"? \t ^ ^\n",
546+
}) != true {
547+
t.Fatal("Differ QFormat() failure:", out)
548+
}
549+
}

0 commit comments

Comments
 (0)