@@ -18,9 +18,11 @@ package difflib
18
18
import (
19
19
"bufio"
20
20
"bytes"
21
+ "errors"
21
22
"fmt"
22
23
"io"
23
24
"strings"
25
+ "unicode"
24
26
)
25
27
26
28
func min (a , b int ) int {
@@ -44,6 +46,14 @@ func calculateRatio(matches, length int) float64 {
44
46
return 1.0
45
47
}
46
48
49
+ func listifyString (str string ) (lst []string ) {
50
+ lst = make ([]string , len (str ))
51
+ for i , c := range str {
52
+ lst [i ] = string (c )
53
+ }
54
+ return lst
55
+ }
56
+
47
57
type Match struct {
48
58
A int
49
59
B int
@@ -511,6 +521,227 @@ func (m *SequenceMatcher) RealQuickRatio() float64 {
511
521
return calculateRatio (min (la , lb ), la + lb )
512
522
}
513
523
524
+ func count_leading (line string , ch byte ) (count int ) {
525
+ // Return number of `ch` characters at the start of `line`.
526
+ count = 0
527
+ n := len (line )
528
+ for (count < n ) && (line [count ] == ch ) {
529
+ count ++
530
+ }
531
+ return count
532
+ }
533
+
534
+ type Differ struct {
535
+ Linejunk func (string ) bool
536
+ Charjunk func (string ) bool
537
+ }
538
+
539
+ func NewDiffer () * Differ {
540
+ return & Differ {}
541
+ }
542
+
543
+ func (d * Differ ) Compare (a []string , b []string ) (diffs []string , err error ) {
544
+ // Compare two sequences of lines; generate the resulting delta.
545
+
546
+ // Each sequence must contain individual single-line strings ending with
547
+ // newlines. Such sequences can be obtained from the `readlines()` method
548
+ // of file-like objects. The delta generated also consists of newline-
549
+ // terminated strings, ready to be printed as-is via the writeline()
550
+ // method of a file-like object.
551
+ diffs = []string {}
552
+ cruncher := NewMatcherWithJunk (a , b , true , d .Linejunk )
553
+ opcodes := cruncher .GetOpCodes ()
554
+ for _ , current := range opcodes {
555
+ alo := current .I1
556
+ ahi := current .I2
557
+ blo := current .J1
558
+ bhi := current .J2
559
+ var g []string
560
+ if current .Tag == 'r' {
561
+ g , _ = d .FancyReplace (a , alo , ahi , b , blo , bhi )
562
+ } else if current .Tag == 'd' {
563
+ g = d .Dump ("-" , a , alo , ahi )
564
+ } else if current .Tag == 'i' {
565
+ g = d .Dump ("+" , b , blo , bhi )
566
+ } else if current .Tag == 'e' {
567
+ g = d .Dump (" " , a , alo , ahi )
568
+ } else {
569
+ return nil , errors .New (fmt .Sprintf ("unknown tag %q" , current .Tag ))
570
+ }
571
+ diffs = append (diffs , g ... )
572
+ }
573
+ return diffs , nil
574
+ }
575
+
576
+ func (d * Differ ) Dump (tag string , x []string , lo int , hi int ) (out []string ) {
577
+ // Generate comparison results for a same-tagged range.
578
+ out = []string {}
579
+ for i := lo ; i < hi ; i ++ {
580
+ out = append (out , fmt .Sprintf ("%s %s" , tag , x [i ]))
581
+ }
582
+ return out
583
+ }
584
+
585
+ func (d * Differ ) PlainReplace (a []string , alo int , ahi int , b []string , blo int , bhi int ) (out []string , err error ) {
586
+ if ! (alo < ahi ) || ! (blo < bhi ) { // assertion
587
+ return nil , errors .New ("low greater than or equal to high" )
588
+ }
589
+ // dump the shorter block first -- reduces the burden on short-term
590
+ // memory if the blocks are of very different sizes
591
+ if bhi - blo < ahi - alo {
592
+ out = d .Dump ("+" , b , blo , bhi )
593
+ out = append (out , d .Dump ("-" , a , alo , ahi )... )
594
+ } else {
595
+ out = d .Dump ("-" , a , alo , ahi )
596
+ out = append (out , d .Dump ("+" , b , blo , bhi )... )
597
+ }
598
+ return out , nil
599
+ }
600
+
601
+ func (d * Differ ) FancyReplace (a []string , alo int , ahi int , b []string , blo int , bhi int ) (out []string , err error ) {
602
+ // When replacing one block of lines with another, search the blocks
603
+ // for *similar* lines; the best-matching pair (if any) is used as a
604
+ // synch point, and intraline difference marking is done on the
605
+ // similar pair. Lots of work, but often worth it.
606
+
607
+ // don't synch up unless the lines have a similarity score of at
608
+ // least cutoff; best_ratio tracks the best score seen so far
609
+ best_ratio := 0.74
610
+ cutoff := 0.75
611
+ cruncher := NewMatcherWithJunk (a , b , true , d .Charjunk )
612
+ eqi := - 1 // 1st indices of equal lines (if any)
613
+ eqj := - 1
614
+ out = []string {}
615
+
616
+ // search for the pair that matches best without being identical
617
+ // (identical lines must be junk lines, & we don't want to synch up
618
+ // on junk -- unless we have to)
619
+ var best_i , best_j int
620
+ for j := blo ; j < bhi ; j ++ {
621
+ bj := b [j ]
622
+ cruncher .SetSeq2 (listifyString (bj ))
623
+ for i := alo ; i < ahi ; i ++ {
624
+ ai := a [i ]
625
+ if ai == bj {
626
+ if eqi == - 1 {
627
+ eqi = i
628
+ eqj = j
629
+ }
630
+ continue
631
+ }
632
+ cruncher .SetSeq1 (listifyString (ai ))
633
+ // computing similarity is expensive, so use the quick
634
+ // upper bounds first -- have seen this speed up messy
635
+ // compares by a factor of 3.
636
+ // note that ratio() is only expensive to compute the first
637
+ // time it's called on a sequence pair; the expensive part
638
+ // of the computation is cached by cruncher
639
+ if cruncher .RealQuickRatio () > best_ratio &&
640
+ cruncher .QuickRatio () > best_ratio &&
641
+ cruncher .Ratio () > best_ratio {
642
+ best_ratio = cruncher .Ratio ()
643
+ best_i = i
644
+ best_j = j
645
+ }
646
+ }
647
+ }
648
+ if best_ratio < cutoff {
649
+ // no non-identical "pretty close" pair
650
+ if eqi == - 1 {
651
+ // no identical pair either -- treat it as a straight replace
652
+ out , _ = d .PlainReplace (a , alo , ahi , b , blo , bhi )
653
+ return out , nil
654
+ }
655
+ // no close pair, but an identical pair -- synch up on that
656
+ best_i = eqi
657
+ best_j = eqj
658
+ best_ratio = 1.0
659
+ } else {
660
+ // there's a close pair, so forget the identical pair (if any)
661
+ eqi = - 1
662
+ }
663
+ // a[best_i] very similar to b[best_j]; eqi is None iff they're not
664
+ // identical
665
+
666
+ // pump out diffs from before the synch point
667
+ out = append (out , d .fancyHelper (a , alo , best_i , b , blo , best_j )... )
668
+
669
+ // do intraline marking on the synch pair
670
+ aelt , belt := a [best_i ], b [best_j ]
671
+ if eqi == - 1 {
672
+ // pump out a '-', '?', '+', '?' quad for the synched lines
673
+ var atags , btags string
674
+ cruncher .SetSeqs (listifyString (aelt ), listifyString (belt ))
675
+ opcodes := cruncher .GetOpCodes ()
676
+ for _ , current := range opcodes {
677
+ ai1 := current .I1
678
+ ai2 := current .I2
679
+ bj1 := current .J1
680
+ bj2 := current .J2
681
+ la , lb := ai2 - ai1 , bj2 - bj1
682
+ if current .Tag == 'r' {
683
+ atags += strings .Repeat ("^" , la )
684
+ btags += strings .Repeat ("^" , lb )
685
+ } else if current .Tag == 'd' {
686
+ atags += strings .Repeat ("-" , la )
687
+ } else if current .Tag == 'i' {
688
+ btags += strings .Repeat ("+" , lb )
689
+ } else if current .Tag == 'e' {
690
+ atags += strings .Repeat (" " , la )
691
+ btags += strings .Repeat (" " , lb )
692
+ } else {
693
+ return nil , errors .New (fmt .Sprintf ("unknown tag %q" ,
694
+ current .Tag ))
695
+ }
696
+ }
697
+ out = append (out , d .QFormat (aelt , belt , atags , btags )... )
698
+ } else {
699
+ // the synch pair is identical
700
+ out = append (out , " " + aelt )
701
+ }
702
+ // pump out diffs from after the synch point
703
+ out = append (out , d .fancyHelper (a , best_i + 1 , ahi , b , best_j + 1 , bhi )... )
704
+ return out , nil
705
+ }
706
+
707
+ func (d * Differ ) fancyHelper (a []string , alo int , ahi int , b []string , blo int , bhi int ) (out []string ) {
708
+ if alo < ahi {
709
+ if blo < bhi {
710
+ out , _ = d .FancyReplace (a , alo , ahi , b , blo , bhi )
711
+ } else {
712
+ out = d .Dump ("-" , a , alo , ahi )
713
+ }
714
+ } else if blo < bhi {
715
+ out = d .Dump ("+" , b , blo , bhi )
716
+ } else {
717
+ out = []string {}
718
+ }
719
+ return out
720
+ }
721
+
722
+ func (d * Differ ) QFormat (aline string , bline string , atags string , btags string ) (out []string ) {
723
+ // Format "?" output and deal with leading tabs.
724
+
725
+ // Can hurt, but will probably help most of the time.
726
+ common := min (count_leading (aline , '\t' ), count_leading (bline , '\t' ))
727
+ common = min (common , count_leading (atags [:common ], ' ' ))
728
+ common = min (common , count_leading (btags [:common ], ' ' ))
729
+ atags = strings .TrimRightFunc (atags [common :], unicode .IsSpace )
730
+ btags = strings .TrimRightFunc (btags [common :], unicode .IsSpace )
731
+
732
+ out = []string {"- " + aline }
733
+ if len (atags ) > 0 {
734
+ out = append (out , fmt .Sprintf ("? %s%s\n " ,
735
+ strings .Repeat ("\t " , common ), atags ))
736
+ }
737
+ out = append (out , "+ " + bline )
738
+ if len (btags ) > 0 {
739
+ out = append (out , fmt .Sprintf ("? %s%s\n " ,
740
+ strings .Repeat ("\t " , common ), btags ))
741
+ }
742
+ return out
743
+ }
744
+
514
745
// Convert range to the "ed" format
515
746
func formatRangeUnified (start , stop int ) string {
516
747
// Per the diff spec at http://www.unix.org/single_unix_specification/
0 commit comments