From a0505d8e3d2cae794e2c935a304dd9d8e96ccfa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lovro=20Ma=C5=BEgon?= <lovro.mazgon@gmail.com>
Date: Thu, 15 Feb 2024 20:42:10 +0100
Subject: [PATCH] add unified diff lib

---
 .../processor/builtin/internal/diff/README.md |  16 +
 .../processor/builtin/internal/diff/diff.go   | 176 +++++++
 .../builtin/internal/diff/diff_test.go        | 207 ++++++++
 .../internal/diff/difftest/difftest.go        | 324 ++++++++++++
 .../internal/diff/difftest/difftest_test.go   |  82 +++
 .../builtin/internal/diff/export_test.go      |   9 +
 .../builtin/internal/diff/lcs/common.go       | 179 +++++++
 .../builtin/internal/diff/lcs/common_test.go  | 140 +++++
 .../builtin/internal/diff/lcs/doc.go          | 156 ++++++
 .../builtin/internal/diff/lcs/git.sh          |  33 ++
 .../builtin/internal/diff/lcs/labels.go       |  55 ++
 .../builtin/internal/diff/lcs/old.go          | 480 ++++++++++++++++++
 .../builtin/internal/diff/lcs/old_test.go     | 251 +++++++++
 .../builtin/internal/diff/lcs/sequence.go     | 113 +++++
 .../builtin/internal/diff/myers/diff.go       | 246 +++++++++
 .../builtin/internal/diff/myers/diff_test.go  |  16 +
 .../processor/builtin/internal/diff/ndiff.go  |  99 ++++
 .../builtin/internal/diff/testenv/testenv.go  | 199 ++++++++
 .../builtin/internal/diff/unified.go          | 251 +++++++++
 19 files changed, 3032 insertions(+)
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/README.md
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/diff.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/diff_test.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/difftest/difftest.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/difftest/difftest_test.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/export_test.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/lcs/common.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/lcs/common_test.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/lcs/doc.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/lcs/git.sh
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/lcs/labels.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/lcs/old.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/lcs/old_test.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/lcs/sequence.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/myers/diff.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/myers/diff_test.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/ndiff.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/testenv/testenv.go
 create mode 100644 pkg/plugin/processor/builtin/internal/diff/unified.go

diff --git a/pkg/plugin/processor/builtin/internal/diff/README.md b/pkg/plugin/processor/builtin/internal/diff/README.md
new file mode 100644
index 000000000..09985b6c8
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/README.md
@@ -0,0 +1,16 @@
+# Diff
+
+This package contains code taken from https://github.com/golang/tools/tree/master/internal/diff
+on February 15th, 2024. We need the code to create a unified diff between two strings.
+
+The code is left as-is, except two changes:
+
+- The imports were changed to reference the Conduit module path. This was done
+  using the following command:
+
+  ```sh
+  find . -type f -exec sed -i '' 's/golang.org\/x\/tools\/internal/github.com\/conduitio\/conduit\/pkg\/plugin\/processor\/builtin\/internal/g' {} +
+  ```
+
+- The package `golang.org/x/tools/internal/testenv` was added into the `diff` package,
+  as that's the only place it's used. It also only includes the required functions.
diff --git a/pkg/plugin/processor/builtin/internal/diff/diff.go b/pkg/plugin/processor/builtin/internal/diff/diff.go
new file mode 100644
index 000000000..a13547b7a
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/diff.go
@@ -0,0 +1,176 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package diff computes differences between text files or strings.
+package diff
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+)
+
+// An Edit describes the replacement of a portion of a text file.
+type Edit struct {
+	Start, End int    // byte offsets of the region to replace
+	New        string // the replacement
+}
+
+func (e Edit) String() string {
+	return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New)
+}
+
+// Apply applies a sequence of edits to the src buffer and returns the
+// result. Edits are applied in order of start offset; edits with the
+// same start offset are applied in they order they were provided.
+//
+// Apply returns an error if any edit is out of bounds,
+// or if any pair of edits is overlapping.
+func Apply(src string, edits []Edit) (string, error) {
+	edits, size, err := validate(src, edits)
+	if err != nil {
+		return "", err
+	}
+
+	// Apply edits.
+	out := make([]byte, 0, size)
+	lastEnd := 0
+	for _, edit := range edits {
+		if lastEnd < edit.Start {
+			out = append(out, src[lastEnd:edit.Start]...)
+		}
+		out = append(out, edit.New...)
+		lastEnd = edit.End
+	}
+	out = append(out, src[lastEnd:]...)
+
+	if len(out) != size {
+		panic("wrong size")
+	}
+
+	return string(out), nil
+}
+
+// ApplyBytes is like Apply, but it accepts a byte slice.
+// The result is always a new array.
+func ApplyBytes(src []byte, edits []Edit) ([]byte, error) {
+	res, err := Apply(string(src), edits)
+	return []byte(res), err
+}
+
+// validate checks that edits are consistent with src,
+// and returns the size of the patched output.
+// It may return a different slice.
+func validate(src string, edits []Edit) ([]Edit, int, error) {
+	if !sort.IsSorted(editsSort(edits)) {
+		edits = append([]Edit(nil), edits...)
+		SortEdits(edits)
+	}
+
+	// Check validity of edits and compute final size.
+	size := len(src)
+	lastEnd := 0
+	for _, edit := range edits {
+		if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) {
+			return nil, 0, fmt.Errorf("diff has out-of-bounds edits")
+		}
+		if edit.Start < lastEnd {
+			return nil, 0, fmt.Errorf("diff has overlapping edits")
+		}
+		size += len(edit.New) + edit.Start - edit.End
+		lastEnd = edit.End
+	}
+
+	return edits, size, nil
+}
+
+// SortEdits orders a slice of Edits by (start, end) offset.
+// This ordering puts insertions (end = start) before deletions
+// (end > start) at the same point, but uses a stable sort to preserve
+// the order of multiple insertions at the same point.
+// (Apply detects multiple deletions at the same point as an error.)
+func SortEdits(edits []Edit) {
+	sort.Stable(editsSort(edits))
+}
+
+type editsSort []Edit
+
+func (a editsSort) Len() int { return len(a) }
+func (a editsSort) Less(i, j int) bool {
+	if cmp := a[i].Start - a[j].Start; cmp != 0 {
+		return cmp < 0
+	}
+	return a[i].End < a[j].End
+}
+func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+
+// lineEdits expands and merges a sequence of edits so that each
+// resulting edit replaces one or more complete lines.
+// See ApplyEdits for preconditions.
+func lineEdits(src string, edits []Edit) ([]Edit, error) {
+	edits, _, err := validate(src, edits)
+	if err != nil {
+		return nil, err
+	}
+
+	// Do all deletions begin and end at the start of a line,
+	// and all insertions end with a newline?
+	// (This is merely a fast path.)
+	for _, edit := range edits {
+		if edit.Start >= len(src) || // insertion at EOF
+			edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start
+			edit.End > 0 && src[edit.End-1] != '\n' || // not at line start
+			edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert
+			goto expand // slow path
+		}
+	}
+	return edits, nil // aligned
+
+expand:
+	if len(edits) == 0 {
+		return edits, nil // no edits (unreachable due to fast path)
+	}
+	expanded := make([]Edit, 0, len(edits)) // a guess
+	prev := edits[0]
+	// TODO(adonovan): opt: start from the first misaligned edit.
+	// TODO(adonovan): opt: avoid quadratic cost of string += string.
+	for _, edit := range edits[1:] {
+		between := src[prev.End:edit.Start]
+		if !strings.Contains(between, "\n") {
+			// overlapping lines: combine with previous edit.
+			prev.New += between + edit.New
+			prev.End = edit.End
+		} else {
+			// non-overlapping lines: flush previous edit.
+			expanded = append(expanded, expandEdit(prev, src))
+			prev = edit
+		}
+	}
+	return append(expanded, expandEdit(prev, src)), nil // flush final edit
+}
+
+// expandEdit returns edit expanded to complete whole lines.
+func expandEdit(edit Edit, src string) Edit {
+	// Expand start left to start of line.
+	// (delta is the zero-based column number of start.)
+	start := edit.Start
+	if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 {
+		edit.Start -= delta
+		edit.New = src[start-delta:start] + edit.New
+	}
+
+	// Expand end right to end of line.
+	end := edit.End
+	if end > 0 && src[end-1] != '\n' ||
+		edit.New != "" && edit.New[len(edit.New)-1] != '\n' {
+		if nl := strings.IndexByte(src[end:], '\n'); nl < 0 {
+			edit.End = len(src) // extend to EOF
+		} else {
+			edit.End = end + nl + 1 // extend beyond \n
+		}
+	}
+	edit.New += src[end:edit.End]
+
+	return edit
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/diff_test.go b/pkg/plugin/processor/builtin/internal/diff/diff_test.go
new file mode 100644
index 000000000..055384679
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/diff_test.go
@@ -0,0 +1,207 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package diff_test
+
+import (
+	"bytes"
+	"math/rand"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"testing"
+	"unicode/utf8"
+
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff"
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/difftest"
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/testenv"
+)
+
+func TestApply(t *testing.T) {
+	for _, tc := range difftest.TestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			got, err := diff.Apply(tc.In, tc.Edits)
+			if err != nil {
+				t.Fatalf("Apply(Edits) failed: %v", err)
+			}
+			if got != tc.Out {
+				t.Errorf("Apply(Edits): got %q, want %q", got, tc.Out)
+			}
+			if tc.LineEdits != nil {
+				got, err := diff.Apply(tc.In, tc.LineEdits)
+				if err != nil {
+					t.Fatalf("Apply(LineEdits) failed: %v", err)
+				}
+				if got != tc.Out {
+					t.Errorf("Apply(LineEdits): got %q, want %q", got, tc.Out)
+				}
+			}
+		})
+	}
+}
+
+func TestNEdits(t *testing.T) {
+	for _, tc := range difftest.TestCases {
+		edits := diff.Strings(tc.In, tc.Out)
+		got, err := diff.Apply(tc.In, edits)
+		if err != nil {
+			t.Fatalf("Apply failed: %v", err)
+		}
+		if got != tc.Out {
+			t.Fatalf("%s: got %q wanted %q", tc.Name, got, tc.Out)
+		}
+		if len(edits) < len(tc.Edits) { // should find subline edits
+			t.Errorf("got %v, expected %v for %#v", edits, tc.Edits, tc)
+		}
+	}
+}
+
+func TestNRandom(t *testing.T) {
+	rand.Seed(1)
+	for i := 0; i < 1000; i++ {
+		a := randstr("abω", 16)
+		b := randstr("abωc", 16)
+		edits := diff.Strings(a, b)
+		got, err := diff.Apply(a, edits)
+		if err != nil {
+			t.Fatalf("Apply failed: %v", err)
+		}
+		if got != b {
+			t.Fatalf("%d: got %q, wanted %q, starting with %q", i, got, b, a)
+		}
+	}
+}
+
+// $ go test -fuzz=FuzzRoundTrip ./internal/diff
+func FuzzRoundTrip(f *testing.F) {
+	f.Fuzz(func(t *testing.T, a, b string) {
+		if !utf8.ValidString(a) || !utf8.ValidString(b) {
+			return // inputs must be text
+		}
+		edits := diff.Strings(a, b)
+		got, err := diff.Apply(a, edits)
+		if err != nil {
+			t.Fatalf("Apply failed: %v", err)
+		}
+		if got != b {
+			t.Fatalf("applying diff(%q, %q) gives %q; edits=%v", a, b, got, edits)
+		}
+	})
+}
+
+func TestLineEdits(t *testing.T) {
+	for _, tc := range difftest.TestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			want := tc.LineEdits
+			if want == nil {
+				want = tc.Edits // already line-aligned
+			}
+			got, err := diff.LineEdits(tc.In, tc.Edits)
+			if err != nil {
+				t.Fatalf("LineEdits: %v", err)
+			}
+			if !reflect.DeepEqual(got, want) {
+				t.Errorf("in=<<%s>>\nout=<<%s>>\nraw  edits=%s\nline edits=%s\nwant: %s",
+					tc.In, tc.Out, tc.Edits, got, want)
+			}
+			// make sure that applying the edits gives the expected result
+			fixed, err := diff.Apply(tc.In, got)
+			if err != nil {
+				t.Error(err)
+			}
+			if fixed != tc.Out {
+				t.Errorf("Apply(LineEdits): got %q, want %q", fixed, tc.Out)
+			}
+		})
+	}
+}
+
+func TestToUnified(t *testing.T) {
+	testenv.NeedsTool(t, "patch")
+	for _, tc := range difftest.TestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			unified, err := diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits, diff.DefaultContextLines)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if unified == "" {
+				return
+			}
+			orig := filepath.Join(t.TempDir(), "original")
+			err = os.WriteFile(orig, []byte(tc.In), 0644)
+			if err != nil {
+				t.Fatal(err)
+			}
+			temp := filepath.Join(t.TempDir(), "patched")
+			err = os.WriteFile(temp, []byte(tc.In), 0644)
+			if err != nil {
+				t.Fatal(err)
+			}
+			cmd := exec.Command("patch", "-p0", "-u", "-s", "-o", temp, orig)
+			cmd.Stdin = strings.NewReader(unified)
+			cmd.Stdout = new(bytes.Buffer)
+			cmd.Stderr = new(bytes.Buffer)
+			if err = cmd.Run(); err != nil {
+				t.Fatalf("%v: %q (%q) (%q)", err, cmd.String(),
+					cmd.Stderr, cmd.Stdout)
+			}
+			got, err := os.ReadFile(temp)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if string(got) != tc.Out {
+				t.Errorf("applying unified failed: got\n%q, wanted\n%q unified\n%q",
+					got, tc.Out, unified)
+			}
+
+		})
+	}
+}
+
+func TestRegressionOld001(t *testing.T) {
+	a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
+
+	b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
+	diffs := diff.Strings(a, b)
+	got, err := diff.Apply(a, diffs)
+	if err != nil {
+		t.Fatalf("Apply failed: %v", err)
+	}
+	if got != b {
+		i := 0
+		for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ {
+		}
+		t.Errorf("oops %vd\n%q\n%q", diffs, got, b)
+		t.Errorf("\n%q\n%q", got[i:], b[i:])
+	}
+}
+
+func TestRegressionOld002(t *testing.T) {
+	a := "n\"\n)\n"
+	b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
+	diffs := diff.Strings(a, b)
+	got, err := diff.Apply(a, diffs)
+	if err != nil {
+		t.Fatalf("Apply failed: %v", err)
+	}
+	if got != b {
+		i := 0
+		for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ {
+		}
+		t.Errorf("oops %vd\n%q\n%q", diffs, got, b)
+		t.Errorf("\n%q\n%q", got[i:], b[i:])
+	}
+}
+
+// return a random string of length n made of characters from s
+func randstr(s string, n int) string {
+	src := []rune(s)
+	x := make([]rune, n)
+	for i := 0; i < n; i++ {
+		x[i] = src[rand.Intn(len(src))]
+	}
+	return string(x)
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/difftest/difftest.go b/pkg/plugin/processor/builtin/internal/diff/difftest/difftest.go
new file mode 100644
index 000000000..bdb51cfa6
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/difftest/difftest.go
@@ -0,0 +1,324 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package difftest supplies a set of tests that will operate on any
+// implementation of a diff algorithm as exposed by
+// "github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff"
+package difftest
+
+// There are two kinds of tests, semantic tests, and 'golden data' tests.
+// The semantic tests check that the computed diffs transform the input to
+// the output, and that 'patch' accepts the computed unified diffs.
+// The other tests just check that Edits and LineEdits haven't changed
+// unexpectedly. These fields may need to be changed when the diff algorithm
+// changes.
+
+import (
+	"testing"
+
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff"
+)
+
+const (
+	FileA         = "from"
+	FileB         = "to"
+	UnifiedPrefix = "--- " + FileA + "\n+++ " + FileB + "\n"
+)
+
+var TestCases = []struct {
+	Name, In, Out, Unified string
+	Edits, LineEdits       []diff.Edit // expectation (LineEdits=nil => already line-aligned)
+	NoDiff                 bool
+}{{
+	Name: "empty",
+	In:   "",
+	Out:  "",
+}, {
+	Name: "no_diff",
+	In:   "gargantuan\n",
+	Out:  "gargantuan\n",
+}, {
+	Name: "replace_all",
+	In:   "fruit\n",
+	Out:  "cheese\n",
+	Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-fruit
++cheese
+`[1:],
+	Edits:     []diff.Edit{{Start: 0, End: 5, New: "cheese"}},
+	LineEdits: []diff.Edit{{Start: 0, End: 6, New: "cheese\n"}},
+}, {
+	Name: "insert_rune",
+	In:   "gord\n",
+	Out:  "gourd\n",
+	Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-gord
++gourd
+`[1:],
+	Edits:     []diff.Edit{{Start: 2, End: 2, New: "u"}},
+	LineEdits: []diff.Edit{{Start: 0, End: 5, New: "gourd\n"}},
+}, {
+	Name: "delete_rune",
+	In:   "groat\n",
+	Out:  "goat\n",
+	Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-groat
++goat
+`[1:],
+	Edits:     []diff.Edit{{Start: 1, End: 2, New: ""}},
+	LineEdits: []diff.Edit{{Start: 0, End: 6, New: "goat\n"}},
+}, {
+	Name: "replace_rune",
+	In:   "loud\n",
+	Out:  "lord\n",
+	Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-loud
++lord
+`[1:],
+	Edits:     []diff.Edit{{Start: 2, End: 3, New: "r"}},
+	LineEdits: []diff.Edit{{Start: 0, End: 5, New: "lord\n"}},
+}, {
+	Name: "replace_partials",
+	In:   "blanket\n",
+	Out:  "bunker\n",
+	Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-blanket
++bunker
+`[1:],
+	Edits: []diff.Edit{
+		{Start: 1, End: 3, New: "u"},
+		{Start: 6, End: 7, New: "r"},
+	},
+	LineEdits: []diff.Edit{{Start: 0, End: 8, New: "bunker\n"}},
+}, {
+	Name: "insert_line",
+	In:   "1: one\n3: three\n",
+	Out:  "1: one\n2: two\n3: three\n",
+	Unified: UnifiedPrefix + `
+@@ -1,2 +1,3 @@
+ 1: one
++2: two
+ 3: three
+`[1:],
+	Edits: []diff.Edit{{Start: 7, End: 7, New: "2: two\n"}},
+}, {
+	Name: "replace_no_newline",
+	In:   "A",
+	Out:  "B",
+	Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-A
+\ No newline at end of file
++B
+\ No newline at end of file
+`[1:],
+	Edits: []diff.Edit{{Start: 0, End: 1, New: "B"}},
+}, {
+	Name: "delete_empty",
+	In:   "meow",
+	Out:  "", // GNU diff -u special case: +0,0
+	Unified: UnifiedPrefix + `
+@@ -1 +0,0 @@
+-meow
+\ No newline at end of file
+`[1:],
+	Edits:     []diff.Edit{{Start: 0, End: 4, New: ""}},
+	LineEdits: []diff.Edit{{Start: 0, End: 4, New: ""}},
+}, {
+	Name: "append_empty",
+	In:   "", // GNU diff -u special case: -0,0
+	Out:  "AB\nC",
+	Unified: UnifiedPrefix + `
+@@ -0,0 +1,2 @@
++AB
++C
+\ No newline at end of file
+`[1:],
+	Edits:     []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
+	LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
+},
+	// TODO(adonovan): fix this test: GNU diff -u prints "+1,2", Unifies prints "+1,3".
+	// 	{
+	// 		Name: "add_start",
+	// 		In:   "A",
+	// 		Out:  "B\nCA",
+	// 		Unified: UnifiedPrefix + `
+	// @@ -1 +1,2 @@
+	// -A
+	// \ No newline at end of file
+	// +B
+	// +CA
+	// \ No newline at end of file
+	// `[1:],
+	// 		Edits:     []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}},
+	// 		LineEdits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}},
+	// 	},
+	{
+		Name: "add_end",
+		In:   "A",
+		Out:  "AB",
+		Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-A
+\ No newline at end of file
++AB
+\ No newline at end of file
+`[1:],
+		Edits:     []diff.Edit{{Start: 1, End: 1, New: "B"}},
+		LineEdits: []diff.Edit{{Start: 0, End: 1, New: "AB"}},
+	}, {
+		Name: "add_empty",
+		In:   "",
+		Out:  "AB\nC",
+		Unified: UnifiedPrefix + `
+@@ -0,0 +1,2 @@
++AB
++C
+\ No newline at end of file
+`[1:],
+		Edits:     []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
+		LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
+	}, {
+		Name: "add_newline",
+		In:   "A",
+		Out:  "A\n",
+		Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-A
+\ No newline at end of file
++A
+`[1:],
+		Edits:     []diff.Edit{{Start: 1, End: 1, New: "\n"}},
+		LineEdits: []diff.Edit{{Start: 0, End: 1, New: "A\n"}},
+	}, {
+		Name: "delete_front",
+		In:   "A\nB\nC\nA\nB\nB\nA\n",
+		Out:  "C\nB\nA\nB\nA\nC\n",
+		Unified: UnifiedPrefix + `
+@@ -1,7 +1,6 @@
+-A
+-B
+ C
++B
+ A
+ B
+-B
+ A
++C
+`[1:],
+		NoDiff: true, // unified diff is different but valid
+		Edits: []diff.Edit{
+			{Start: 0, End: 4, New: ""},
+			{Start: 6, End: 6, New: "B\n"},
+			{Start: 10, End: 12, New: ""},
+			{Start: 14, End: 14, New: "C\n"},
+		},
+		LineEdits: []diff.Edit{
+			{Start: 0, End: 4, New: ""},
+			{Start: 6, End: 6, New: "B\n"},
+			{Start: 10, End: 12, New: ""},
+			{Start: 14, End: 14, New: "C\n"},
+		},
+	}, {
+		Name: "replace_last_line",
+		In:   "A\nB\n",
+		Out:  "A\nC\n\n",
+		Unified: UnifiedPrefix + `
+@@ -1,2 +1,3 @@
+ A
+-B
++C
++
+`[1:],
+		Edits:     []diff.Edit{{Start: 2, End: 3, New: "C\n"}},
+		LineEdits: []diff.Edit{{Start: 2, End: 4, New: "C\n\n"}},
+	},
+	{
+		Name: "multiple_replace",
+		In:   "A\nB\nC\nD\nE\nF\nG\n",
+		Out:  "A\nH\nI\nJ\nE\nF\nK\n",
+		Unified: UnifiedPrefix + `
+@@ -1,7 +1,7 @@
+ A
+-B
+-C
+-D
++H
++I
++J
+ E
+ F
+-G
++K
+`[1:],
+		Edits: []diff.Edit{
+			{Start: 2, End: 8, New: "H\nI\nJ\n"},
+			{Start: 12, End: 14, New: "K\n"},
+		},
+		NoDiff: true, // diff algorithm produces different delete/insert pattern
+	},
+	{
+		Name:  "extra_newline",
+		In:    "\nA\n",
+		Out:   "A\n",
+		Edits: []diff.Edit{{Start: 0, End: 1, New: ""}},
+		Unified: UnifiedPrefix + `@@ -1,2 +1 @@
+-
+ A
+`,
+	}, {
+		Name:      "unified_lines",
+		In:        "aaa\nccc\n",
+		Out:       "aaa\nbbb\nccc\n",
+		Edits:     []diff.Edit{{Start: 3, End: 3, New: "\nbbb"}},
+		LineEdits: []diff.Edit{{Start: 0, End: 4, New: "aaa\nbbb\n"}},
+		Unified:   UnifiedPrefix + "@@ -1,2 +1,3 @@\n aaa\n+bbb\n ccc\n",
+	}, {
+		Name: "60379",
+		In: `package a
+
+type S struct {
+s fmt.Stringer
+}
+`,
+		Out: `package a
+
+type S struct {
+	s fmt.Stringer
+}
+`,
+		Edits:     []diff.Edit{{Start: 27, End: 27, New: "\t"}},
+		LineEdits: []diff.Edit{{Start: 27, End: 42, New: "\ts fmt.Stringer\n"}},
+		Unified:   UnifiedPrefix + "@@ -1,5 +1,5 @@\n package a\n \n type S struct {\n-s fmt.Stringer\n+\ts fmt.Stringer\n }\n",
+	},
+}
+
+func DiffTest(t *testing.T, compute func(before, after string) []diff.Edit) {
+	for _, test := range TestCases {
+		t.Run(test.Name, func(t *testing.T) {
+			edits := compute(test.In, test.Out)
+			got, err := diff.Apply(test.In, edits)
+			if err != nil {
+				t.Fatalf("Apply failed: %v", err)
+			}
+			unified, err := diff.ToUnified(FileA, FileB, test.In, edits, diff.DefaultContextLines)
+			if err != nil {
+				t.Fatalf("ToUnified: %v", err)
+			}
+			if got != test.Out {
+				t.Errorf("Apply: got patched:\n%v\nfrom diff:\n%v\nexpected:\n%v",
+					got, unified, test.Out)
+			}
+			if !test.NoDiff && unified != test.Unified {
+				t.Errorf("Unified: got diff:\n%q\nexpected:\n%q diffs:%v",
+					unified, test.Unified, edits)
+			}
+		})
+	}
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/difftest/difftest_test.go b/pkg/plugin/processor/builtin/internal/diff/difftest/difftest_test.go
new file mode 100644
index 000000000..5ff4aae05
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/difftest/difftest_test.go
@@ -0,0 +1,82 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package difftest supplies a set of tests that will operate on any
+// implementation of a diff algorithm as exposed by
+// "github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff"
+package difftest_test
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/difftest"
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/testenv"
+)
+
+func TestVerifyUnified(t *testing.T) {
+	testenv.NeedsTool(t, "diff")
+	for _, test := range difftest.TestCases {
+		t.Run(test.Name, func(t *testing.T) {
+			if test.NoDiff {
+				t.Skip("diff tool produces expected different results")
+			}
+			diff, err := getDiffOutput(test.In, test.Out)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if len(diff) > 0 {
+				diff = difftest.UnifiedPrefix + diff
+			}
+			if diff != test.Unified {
+				t.Errorf("unified:\n%s\ndiff -u:\n%s", test.Unified, diff)
+			}
+		})
+	}
+}
+
+func getDiffOutput(a, b string) (string, error) {
+	fileA, err := os.CreateTemp("", "myers.in")
+	if err != nil {
+		return "", err
+	}
+	defer os.Remove(fileA.Name())
+	if _, err := fileA.Write([]byte(a)); err != nil {
+		return "", err
+	}
+	if err := fileA.Close(); err != nil {
+		return "", err
+	}
+	fileB, err := os.CreateTemp("", "myers.in")
+	if err != nil {
+		return "", err
+	}
+	defer os.Remove(fileB.Name())
+	if _, err := fileB.Write([]byte(b)); err != nil {
+		return "", err
+	}
+	if err := fileB.Close(); err != nil {
+		return "", err
+	}
+	cmd := exec.Command("diff", "-u", fileA.Name(), fileB.Name())
+	cmd.Env = append(cmd.Env, "LANG=en_US.UTF-8")
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		if _, ok := err.(*exec.ExitError); !ok {
+			return "", fmt.Errorf("failed to run diff -u %v %v: %v\n%v", fileA.Name(), fileB.Name(), err, string(out))
+		}
+	}
+	diff := string(out)
+	if len(diff) <= 0 {
+		return diff, nil
+	}
+	bits := strings.SplitN(diff, "\n", 3)
+	if len(bits) != 3 {
+		return "", fmt.Errorf("diff output did not have file prefix:\n%s", diff)
+	}
+	return bits[2], nil
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/export_test.go b/pkg/plugin/processor/builtin/internal/diff/export_test.go
new file mode 100644
index 000000000..eedf0dd77
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/export_test.go
@@ -0,0 +1,9 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package diff
+
+// This file exports some private declarations to tests.
+
+var LineEdits = lineEdits
diff --git a/pkg/plugin/processor/builtin/internal/diff/lcs/common.go b/pkg/plugin/processor/builtin/internal/diff/lcs/common.go
new file mode 100644
index 000000000..c3e82dd26
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/lcs/common.go
@@ -0,0 +1,179 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+import (
+	"log"
+	"sort"
+)
+
+// lcs is a longest common sequence
+type lcs []diag
+
+// A diag is a piece of the edit graph where A[X+i] == B[Y+i], for 0<=i<Len.
+// All computed diagonals are parts of a longest common subsequence.
+type diag struct {
+	X, Y int
+	Len  int
+}
+
+// sort sorts in place, by lowest X, and if tied, inversely by Len
+func (l lcs) sort() lcs {
+	sort.Slice(l, func(i, j int) bool {
+		if l[i].X != l[j].X {
+			return l[i].X < l[j].X
+		}
+		return l[i].Len > l[j].Len
+	})
+	return l
+}
+
+// validate that the elements of the lcs do not overlap
+// (can only happen when the two-sided algorithm ends early)
+// expects the lcs to be sorted
+func (l lcs) valid() bool {
+	for i := 1; i < len(l); i++ {
+		if l[i-1].X+l[i-1].Len > l[i].X {
+			return false
+		}
+		if l[i-1].Y+l[i-1].Len > l[i].Y {
+			return false
+		}
+	}
+	return true
+}
+
+// repair overlapping lcs
+// only called if two-sided stops early
+func (l lcs) fix() lcs {
+	// from the set of diagonals in l, find a maximal non-conflicting set
+	// this problem may be NP-complete, but we use a greedy heuristic,
+	// which is quadratic, but with a better data structure, could be D log D.
+	// indepedent is not enough: {0,3,1} and {3,0,2} can't both occur in an lcs
+	// which has to have monotone x and y
+	if len(l) == 0 {
+		return nil
+	}
+	sort.Slice(l, func(i, j int) bool { return l[i].Len > l[j].Len })
+	tmp := make(lcs, 0, len(l))
+	tmp = append(tmp, l[0])
+	for i := 1; i < len(l); i++ {
+		var dir direction
+		nxt := l[i]
+		for _, in := range tmp {
+			if dir, nxt = overlap(in, nxt); dir == empty || dir == bad {
+				break
+			}
+		}
+		if nxt.Len > 0 && dir != bad {
+			tmp = append(tmp, nxt)
+		}
+	}
+	tmp.sort()
+	if false && !tmp.valid() { // debug checking
+		log.Fatalf("here %d", len(tmp))
+	}
+	return tmp
+}
+
+type direction int
+
+const (
+	empty    direction = iota // diag is empty (so not in lcs)
+	leftdown                  // proposed acceptably to the left and below
+	rightup                   // proposed diag is acceptably to the right and above
+	bad                       // proposed diag is inconsistent with the lcs so far
+)
+
+// overlap trims the proposed diag prop  so it doesn't overlap with
+// the existing diag that has already been added to the lcs.
+func overlap(exist, prop diag) (direction, diag) {
+	if prop.X <= exist.X && exist.X < prop.X+prop.Len {
+		// remove the end of prop where it overlaps with the X end of exist
+		delta := prop.X + prop.Len - exist.X
+		prop.Len -= delta
+		if prop.Len <= 0 {
+			return empty, prop
+		}
+	}
+	if exist.X <= prop.X && prop.X < exist.X+exist.Len {
+		// remove the beginning of prop where overlaps with exist
+		delta := exist.X + exist.Len - prop.X
+		prop.Len -= delta
+		if prop.Len <= 0 {
+			return empty, prop
+		}
+		prop.X += delta
+		prop.Y += delta
+	}
+	if prop.Y <= exist.Y && exist.Y < prop.Y+prop.Len {
+		// remove the end of prop that overlaps (in Y) with exist
+		delta := prop.Y + prop.Len - exist.Y
+		prop.Len -= delta
+		if prop.Len <= 0 {
+			return empty, prop
+		}
+	}
+	if exist.Y <= prop.Y && prop.Y < exist.Y+exist.Len {
+		// remove the beginning of peop that overlaps with exist
+		delta := exist.Y + exist.Len - prop.Y
+		prop.Len -= delta
+		if prop.Len <= 0 {
+			return empty, prop
+		}
+		prop.X += delta // no test reaches this code
+		prop.Y += delta
+	}
+	if prop.X+prop.Len <= exist.X && prop.Y+prop.Len <= exist.Y {
+		return leftdown, prop
+	}
+	if exist.X+exist.Len <= prop.X && exist.Y+exist.Len <= prop.Y {
+		return rightup, prop
+	}
+	// prop can't be in an lcs that contains exist
+	return bad, prop
+}
+
+// manipulating Diag and lcs
+
+// prepend a diagonal (x,y)-(x+1,y+1) segment either to an empty lcs
+// or to its first Diag. prepend is only called to extend diagonals
+// the backward direction.
+func (lcs lcs) prepend(x, y int) lcs {
+	if len(lcs) > 0 {
+		d := &lcs[0]
+		if int(d.X) == x+1 && int(d.Y) == y+1 {
+			// extend the diagonal down and to the left
+			d.X, d.Y = int(x), int(y)
+			d.Len++
+			return lcs
+		}
+	}
+
+	r := diag{X: int(x), Y: int(y), Len: 1}
+	lcs = append([]diag{r}, lcs...)
+	return lcs
+}
+
+// append appends a diagonal, or extends the existing one.
+// by adding the edge (x,y)-(x+1.y+1). append is only called
+// to extend diagonals in the forward direction.
+func (lcs lcs) append(x, y int) lcs {
+	if len(lcs) > 0 {
+		last := &lcs[len(lcs)-1]
+		// Expand last element if adjoining.
+		if last.X+last.Len == x && last.Y+last.Len == y {
+			last.Len++
+			return lcs
+		}
+	}
+
+	return append(lcs, diag{X: x, Y: y, Len: 1})
+}
+
+// enforce constraint on d, k
+func ok(d, k int) bool {
+	return d >= 0 && -d <= k && k <= d
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/lcs/common_test.go b/pkg/plugin/processor/builtin/internal/diff/lcs/common_test.go
new file mode 100644
index 000000000..f19245e40
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/lcs/common_test.go
@@ -0,0 +1,140 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+import (
+	"log"
+	"math/rand"
+	"strings"
+	"testing"
+)
+
+type Btest struct {
+	a, b string
+	lcs  []string
+}
+
+var Btests = []Btest{
+	{"aaabab", "abaab", []string{"abab", "aaab"}},
+	{"aabbba", "baaba", []string{"aaba"}},
+	{"cabbx", "cbabx", []string{"cabx", "cbbx"}},
+	{"c", "cb", []string{"c"}},
+	{"aaba", "bbb", []string{"b"}},
+	{"bbaabb", "b", []string{"b"}},
+	{"baaabb", "bbaba", []string{"bbb", "baa", "bab"}},
+	{"baaabb", "abbab", []string{"abb", "bab", "aab"}},
+	{"baaba", "aaabba", []string{"aaba"}},
+	{"ca", "cba", []string{"ca"}},
+	{"ccbcbc", "abba", []string{"bb"}},
+	{"ccbcbc", "aabba", []string{"bb"}},
+	{"ccb", "cba", []string{"cb"}},
+	{"caef", "axe", []string{"ae"}},
+	{"bbaabb", "baabb", []string{"baabb"}},
+	// Example from Myers:
+	{"abcabba", "cbabac", []string{"caba", "baba", "cbba"}},
+	{"3456aaa", "aaa", []string{"aaa"}},
+	{"aaa", "aaa123", []string{"aaa"}},
+	{"aabaa", "aacaa", []string{"aaaa"}},
+	{"1a", "a", []string{"a"}},
+	{"abab", "bb", []string{"bb"}},
+	{"123", "ab", []string{""}},
+	{"a", "b", []string{""}},
+	{"abc", "123", []string{""}},
+	{"aa", "aa", []string{"aa"}},
+	{"abcde", "12345", []string{""}},
+	{"aaa3456", "aaa", []string{"aaa"}},
+	{"abcde", "12345a", []string{"a"}},
+	{"ab", "123", []string{""}},
+	{"1a2", "a", []string{"a"}},
+	// for two-sided
+	{"babaab", "cccaba", []string{"aba"}},
+	{"aabbab", "cbcabc", []string{"bab"}},
+	{"abaabb", "bcacab", []string{"baab"}},
+	{"abaabb", "abaaaa", []string{"abaa"}},
+	{"bababb", "baaabb", []string{"baabb"}},
+	{"abbbaa", "cabacc", []string{"aba"}},
+	{"aabbaa", "aacaba", []string{"aaaa", "aaba"}},
+}
+
+func init() {
+	log.SetFlags(log.Lshortfile)
+}
+
+func check(t *testing.T, str string, lcs lcs, want []string) {
+	t.Helper()
+	if !lcs.valid() {
+		t.Errorf("bad lcs %v", lcs)
+	}
+	var got strings.Builder
+	for _, dd := range lcs {
+		got.WriteString(str[dd.X : dd.X+dd.Len])
+	}
+	ans := got.String()
+	for _, w := range want {
+		if ans == w {
+			return
+		}
+	}
+	t.Fatalf("str=%q lcs=%v want=%q got=%q", str, lcs, want, ans)
+}
+
+func checkDiffs(t *testing.T, before string, diffs []Diff, after string) {
+	t.Helper()
+	var ans strings.Builder
+	sofar := 0 // index of position in before
+	for _, d := range diffs {
+		if sofar < d.Start {
+			ans.WriteString(before[sofar:d.Start])
+		}
+		ans.WriteString(after[d.ReplStart:d.ReplEnd])
+		sofar = d.End
+	}
+	ans.WriteString(before[sofar:])
+	if ans.String() != after {
+		t.Fatalf("diff %v took %q to %q, not to %q", diffs, before, ans.String(), after)
+	}
+}
+
+func lcslen(l lcs) int {
+	ans := 0
+	for _, d := range l {
+		ans += int(d.Len)
+	}
+	return ans
+}
+
+// return a random string of length n made of characters from s
+func randstr(s string, n int) string {
+	src := []rune(s)
+	x := make([]rune, n)
+	for i := 0; i < n; i++ {
+		x[i] = src[rand.Intn(len(src))]
+	}
+	return string(x)
+}
+
+func TestLcsFix(t *testing.T) {
+	tests := []struct{ before, after lcs }{
+		{lcs{diag{0, 0, 3}, diag{2, 2, 5}, diag{3, 4, 5}, diag{8, 9, 4}}, lcs{diag{0, 0, 2}, diag{2, 2, 1}, diag{3, 4, 5}, diag{8, 9, 4}}},
+		{lcs{diag{1, 1, 6}, diag{6, 12, 3}}, lcs{diag{1, 1, 5}, diag{6, 12, 3}}},
+		{lcs{diag{0, 0, 4}, diag{3, 5, 4}}, lcs{diag{0, 0, 3}, diag{3, 5, 4}}},
+		{lcs{diag{0, 20, 1}, diag{0, 0, 3}, diag{1, 20, 4}}, lcs{diag{0, 0, 3}, diag{3, 22, 2}}},
+		{lcs{diag{0, 0, 4}, diag{1, 1, 2}}, lcs{diag{0, 0, 4}}},
+		{lcs{diag{0, 0, 4}}, lcs{diag{0, 0, 4}}},
+		{lcs{}, lcs{}},
+		{lcs{diag{0, 0, 4}, diag{1, 1, 6}, diag{3, 3, 2}}, lcs{diag{0, 0, 1}, diag{1, 1, 6}}},
+	}
+	for n, x := range tests {
+		got := x.before.fix()
+		if len(got) != len(x.after) {
+			t.Errorf("got %v, expected %v, for %v", got, x.after, x.before)
+		}
+		olen := lcslen(x.after)
+		glen := lcslen(got)
+		if olen != glen {
+			t.Errorf("%d: lens(%d,%d) differ, %v, %v, %v", n, glen, olen, got, x.after, x.before)
+		}
+	}
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/lcs/doc.go b/pkg/plugin/processor/builtin/internal/diff/lcs/doc.go
new file mode 100644
index 000000000..9029dd20b
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/lcs/doc.go
@@ -0,0 +1,156 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// package lcs contains code to find longest-common-subsequences
+// (and diffs)
+package lcs
+
+/*
+Compute longest-common-subsequences of two slices A, B using
+algorithms from Myers' paper. A longest-common-subsequence
+(LCS from now on) of A and B is a maximal set of lexically increasing
+pairs of subscripts (x,y) with A[x]==B[y]. There may be many LCS, but
+they all have the same length. An LCS determines a sequence of edits
+that changes A into B.
+
+The key concept is the edit graph of A and B.
+If A has length N and B has length M, then the edit graph has
+vertices v[i][j] for 0 <= i <= N, 0 <= j <= M. There is a
+horizontal edge from v[i][j] to v[i+1][j] whenever both are in
+the graph, and a vertical edge from v[i][j] to f[i][j+1] similarly.
+When A[i] == B[j] there is a diagonal edge from v[i][j] to v[i+1][j+1].
+
+A path between in the graph between (0,0) and (N,M) determines a sequence
+of edits converting A into B: each horizontal edge corresponds to removing
+an element of A, and each vertical edge corresponds to inserting an
+element of B.
+
+A vertex (x,y) is on (forward) diagonal k if x-y=k. A path in the graph
+is of length D if it has D non-diagonal edges. The algorithms generate
+forward paths (in which at least one of x,y increases at each edge),
+or backward paths (in which at least one of x,y decreases at each edge),
+or a combination. (Note that the orientation is the traditional mathematical one,
+with the origin in the lower-left corner.)
+
+Here is the edit graph for A:"aabbaa", B:"aacaba". (I know the diagonals look weird.)
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+   a      |   ___/‾‾‾   |   ___/‾‾‾   |             |             |   ___/‾‾‾   |   ___/‾‾‾   |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+   b      |             |             |   ___/‾‾‾   |   ___/‾‾‾   |             |             |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+   a      |   ___/‾‾‾   |   ___/‾‾‾   |             |             |   ___/‾‾‾   |   ___/‾‾‾   |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+   c      |             |             |             |             |             |             |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+   a      |   ___/‾‾‾   |   ___/‾‾‾   |             |             |   ___/‾‾‾   |   ___/‾‾‾   |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+   a      |   ___/‾‾‾   |   ___/‾‾‾   |             |             |   ___/‾‾‾   |   ___/‾‾‾   |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+                 a             a             b             b             a             a
+
+
+The algorithm labels a vertex (x,y) with D,k if it is on diagonal k and at
+the end of a maximal path of length D. (Because x-y=k it suffices to remember
+only the x coordinate of the vertex.)
+
+The forward algorithm: Find the longest diagonal starting at (0,0) and
+label its end with D=0,k=0. From that vertex take a vertical step and
+then follow the longest diagonal (up and to the right), and label that vertex
+with D=1,k=-1. From the D=0,k=0 point take a horizontal step and the follow
+the longest diagonal (up and to the right) and label that vertex
+D=1,k=1. In the same way, having labelled all the D vertices,
+from a vertex labelled D,k find two vertices
+tentatively labelled D+1,k-1 and D+1,k+1. There may be two on the same
+diagonal, in which case take the one with the larger x.
+
+Eventually the path gets to (N,M), and the diagonals on it are the LCS.
+
+Here is the edit graph with the ends of D-paths labelled. (So, for instance,
+0/2,2 indicates that x=2,y=2 is labelled with 0, as it should be, since the first
+step is to go up the longest diagonal from (0,0).)
+A:"aabbaa", B:"aacaba"
+          ⊙   -------   ⊙   -------   ⊙   -------(3/3,6)-------   ⊙   -------(3/5,6)-------(4/6,6)
+   a      |   ___/‾‾‾   |   ___/‾‾‾   |             |             |   ___/‾‾‾   |   ___/‾‾‾   |
+          ⊙   -------   ⊙   -------   ⊙   -------(2/3,5)-------   ⊙   -------   ⊙   -------   ⊙
+   b      |             |             |   ___/‾‾‾   |   ___/‾‾‾   |             |             |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------(3/5,4)-------   ⊙
+   a      |   ___/‾‾‾   |   ___/‾‾‾   |             |             |   ___/‾‾‾   |   ___/‾‾‾   |
+          ⊙   -------   ⊙   -------(1/2,3)-------(2/3,3)-------   ⊙   -------   ⊙   -------   ⊙
+   c      |             |             |             |             |             |             |
+          ⊙   -------   ⊙   -------(0/2,2)-------(1/3,2)-------(2/4,2)-------(3/5,2)-------(4/6,2)
+   a      |   ___/‾‾‾   |   ___/‾‾‾   |             |             |   ___/‾‾‾   |   ___/‾‾‾   |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+   a      |   ___/‾‾‾   |   ___/‾‾‾   |             |             |   ___/‾‾‾   |   ___/‾‾‾   |
+          ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙   -------   ⊙
+                 a             a             b             b             a             a
+
+The 4-path is reconstructed starting at (4/6,6), horizontal to (3/5,6), diagonal to (3,4), vertical
+to (2/3,3), horizontal to (1/2,3), vertical to (0/2,2), and diagonal to (0,0). As expected,
+there are 4 non-diagonal steps, and the diagonals form an LCS.
+
+There is a symmetric backward algorithm, which gives (backwards labels are prefixed with a colon):
+A:"aabbaa", B:"aacaba"
+            ⊙   --------    ⊙   --------    ⊙   --------    ⊙   --------    ⊙   --------    ⊙   --------    ⊙
+    a       |   ____/‾‾‾    |   ____/‾‾‾    |               |               |   ____/‾‾‾    |   ____/‾‾‾    |
+            ⊙   --------    ⊙   --------    ⊙   --------    ⊙   --------    ⊙   --------(:0/5,5)--------    ⊙
+    b       |               |               |   ____/‾‾‾    |   ____/‾‾‾    |               |               |
+            ⊙   --------    ⊙   --------    ⊙   --------(:1/3,4)--------    ⊙   --------    ⊙   --------    ⊙
+    a       |   ____/‾‾‾    |   ____/‾‾‾    |               |               |   ____/‾‾‾    |   ____/‾‾‾    |
+        (:3/0,3)--------(:2/1,3)--------    ⊙   --------(:2/3,3)--------(:1/4,3)--------    ⊙   --------    ⊙
+    c       |               |               |               |               |               |               |
+            ⊙   --------    ⊙   --------    ⊙   --------(:3/3,2)--------(:2/4,2)--------    ⊙   --------    ⊙
+    a       |   ____/‾‾‾    |   ____/‾‾‾    |               |               |   ____/‾‾‾    |   ____/‾‾‾    |
+        (:3/0,1)--------    ⊙   --------    ⊙   --------    ⊙   --------(:3/4,1)--------    ⊙   --------    ⊙
+    a       |   ____/‾‾‾    |   ____/‾‾‾    |               |               |   ____/‾‾‾    |   ____/‾‾‾    |
+        (:4/0,0)--------    ⊙   --------    ⊙   --------    ⊙   --------(:4/4,0)--------    ⊙   --------    ⊙
+                    a               a               b               b               a               a
+
+Neither of these is ideal for use in an editor, where it is undesirable to send very long diffs to the
+front end. It's tricky to decide exactly what 'very long diffs' means, as "replace A by B" is very short.
+We want to control how big D can be, by stopping when it gets too large. The forward algorithm then
+privileges common prefixes, and the backward algorithm privileges common suffixes. Either is an undesirable
+asymmetry.
+
+Fortunately there is a two-sided algorithm, implied by results in Myers' paper. Here's what the labels in
+the edit graph look like.
+A:"aabbaa", B:"aacaba"
+             ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙
+    a        |    ____/‾‾‾‾    |    ____/‾‾‾‾    |                 |                 |    ____/‾‾‾‾    |    ____/‾‾‾‾    |
+             ⊙    ---------    ⊙    ---------    ⊙    --------- (2/3,5) ---------    ⊙    --------- (:0/5,5)---------    ⊙
+    b        |                 |                 |    ____/‾‾‾‾    |    ____/‾‾‾‾    |                 |                 |
+             ⊙    ---------    ⊙    ---------    ⊙    --------- (:1/3,4)---------    ⊙    ---------    ⊙    ---------    ⊙
+    a        |    ____/‾‾‾‾    |    ____/‾‾‾‾    |                 |                 |    ____/‾‾‾‾    |    ____/‾‾‾‾    |
+             ⊙    --------- (:2/1,3)--------- (1/2,3) ---------(2:2/3,3)--------- (:1/4,3)---------    ⊙    ---------    ⊙
+    c        |                 |                 |                 |                 |                 |                 |
+             ⊙    ---------    ⊙    --------- (0/2,2) --------- (1/3,2) ---------(2:2/4,2)---------    ⊙    ---------    ⊙
+    a        |    ____/‾‾‾‾    |    ____/‾‾‾‾    |                 |                 |    ____/‾‾‾‾    |    ____/‾‾‾‾    |
+             ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙
+    a        |    ____/‾‾‾‾    |    ____/‾‾‾‾    |                 |                 |    ____/‾‾‾‾    |    ____/‾‾‾‾    |
+             ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙    ---------    ⊙
+                      a                 a                 b                 b                 a                 a
+
+The algorithm stopped when it saw the backwards 2-path ending at (1,3) and the forwards 2-path ending at (3,5). The criterion
+is a backwards path ending at (u,v) and a forward path ending at (x,y), where u <= x and the two points are on the same
+diagonal. (Here the edgegraph has a diagonal, but the criterion is x-y=u-v.) Myers proves there is a forward
+2-path from (0,0) to (1,3), and that together with the backwards 2-path ending at (1,3) gives the expected 4-path.
+Unfortunately the forward path has to be constructed by another run of the forward algorithm; it can't be found from the
+computed labels. That is the worst case. Had the code noticed (x,y)=(u,v)=(3,3) the whole path could be reconstructed
+from the edgegraph. The implementation looks for a number of special cases to try to avoid computing an extra forward path.
+
+If the two-sided algorithm has stop early (because D has become too large) it will have found a forward LCS and a
+backwards LCS. Ideally these go with disjoint prefixes and suffixes of A and B, but disjointness may fail and the two
+computed LCS may conflict. (An easy example is where A is a suffix of B, and shares a short prefix. The backwards LCS
+is all of A, and the forward LCS is a prefix of A.) The algorithm combines the two
+to form a best-effort LCS. In the worst case the forward partial LCS may have to
+be recomputed.
+*/
+
+/* Eugene Myers paper is titled
+"An O(ND) Difference Algorithm and Its Variations"
+and can be found at
+http://www.xmailserver.org/diff2.pdf
+
+(There is a generic implementation of the algorithm the repository with git hash
+b9ad7e4ade3a686d608e44475390ad428e60e7fc)
+*/
diff --git a/pkg/plugin/processor/builtin/internal/diff/lcs/git.sh b/pkg/plugin/processor/builtin/internal/diff/lcs/git.sh
new file mode 100644
index 000000000..b25ba4aac
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/lcs/git.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# Copyright 2022 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+#
+# Creates a zip file containing all numbered versions
+# of the commit history of a large source file, for use
+# as input data for the tests of the diff algorithm.
+#
+# Run script from root of the x/tools repo.
+
+set -eu
+
+# WARNING: This script will install the latest version of $file
+# The largest real source file in the x/tools repo.
+# file=internal/golang/completion/completion.go
+# file=internal/golang/diagnostics.go
+file=internal/protocol/tsprotocol.go
+
+tmp=$(mktemp -d)
+git log $file |
+  awk '/^commit / {print $2}' |
+  nl -ba -nrz |
+  while read n hash; do
+    git checkout --quiet $hash $file
+    cp -f $file $tmp/$n
+  done
+(cd $tmp && zip -q - *) > testdata.zip
+rm -fr $tmp
+git restore --staged $file
+git restore $file
+echo "Created testdata.zip"
diff --git a/pkg/plugin/processor/builtin/internal/diff/lcs/labels.go b/pkg/plugin/processor/builtin/internal/diff/lcs/labels.go
new file mode 100644
index 000000000..504913d1d
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/lcs/labels.go
@@ -0,0 +1,55 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+import (
+	"fmt"
+)
+
+// For each D, vec[D] has length D+1,
+// and the label for (D, k) is stored in vec[D][(D+k)/2].
+type label struct {
+	vec [][]int
+}
+
+// Temporary checking DO NOT COMMIT true TO PRODUCTION CODE
+const debug = false
+
+// debugging. check that the (d,k) pair is valid
+// (that is, -d<=k<=d and d+k even)
+func checkDK(D, k int) {
+	if k >= -D && k <= D && (D+k)%2 == 0 {
+		return
+	}
+	panic(fmt.Sprintf("out of range, d=%d,k=%d", D, k))
+}
+
+func (t *label) set(D, k, x int) {
+	if debug {
+		checkDK(D, k)
+	}
+	for len(t.vec) <= D {
+		t.vec = append(t.vec, nil)
+	}
+	if t.vec[D] == nil {
+		t.vec[D] = make([]int, D+1)
+	}
+	t.vec[D][(D+k)/2] = x // known that D+k is even
+}
+
+func (t *label) get(d, k int) int {
+	if debug {
+		checkDK(d, k)
+	}
+	return int(t.vec[d][(d+k)/2])
+}
+
+func newtriang(limit int) label {
+	if limit < 100 {
+		// Preallocate if limit is not large.
+		return label{vec: make([][]int, limit)}
+	}
+	return label{}
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/lcs/old.go b/pkg/plugin/processor/builtin/internal/diff/lcs/old.go
new file mode 100644
index 000000000..a14ae9119
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/lcs/old.go
@@ -0,0 +1,480 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+// TODO(adonovan): remove unclear references to "old" in this package.
+
+import (
+	"fmt"
+)
+
+// A Diff is a replacement of a portion of A by a portion of B.
+type Diff struct {
+	Start, End         int // offsets of portion to delete in A
+	ReplStart, ReplEnd int // offset of replacement text in B
+}
+
+// DiffStrings returns the differences between two strings.
+// It does not respect rune boundaries.
+func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) }
+
+// DiffBytes returns the differences between two byte sequences.
+// It does not respect rune boundaries.
+func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) }
+
+// DiffRunes returns the differences between two rune sequences.
+func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) }
+
+func diff(seqs sequences) []Diff {
+	// A limit on how deeply the LCS algorithm should search. The value is just a guess.
+	const maxDiffs = 100
+	diff, _ := compute(seqs, twosided, maxDiffs/2)
+	return diff
+}
+
+// compute computes the list of differences between two sequences,
+// along with the LCS. It is exercised directly by tests.
+// The algorithm is one of {forward, backward, twosided}.
+func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) {
+	if limit <= 0 {
+		limit = 1 << 25 // effectively infinity
+	}
+	alen, blen := seqs.lengths()
+	g := &editGraph{
+		seqs:  seqs,
+		vf:    newtriang(limit),
+		vb:    newtriang(limit),
+		limit: limit,
+		ux:    alen,
+		uy:    blen,
+		delta: alen - blen,
+	}
+	lcs := algo(g)
+	diffs := lcs.toDiffs(alen, blen)
+	return diffs, lcs
+}
+
+// editGraph carries the information for computing the lcs of two sequences.
+type editGraph struct {
+	seqs   sequences
+	vf, vb label // forward and backward labels
+
+	limit int // maximal value of D
+	// the bounding rectangle of the current edit graph
+	lx, ly, ux, uy int
+	delta          int // common subexpression: (ux-lx)-(uy-ly)
+}
+
+// toDiffs converts an LCS to a list of edits.
+func (lcs lcs) toDiffs(alen, blen int) []Diff {
+	var diffs []Diff
+	var pa, pb int // offsets in a, b
+	for _, l := range lcs {
+		if pa < l.X || pb < l.Y {
+			diffs = append(diffs, Diff{pa, l.X, pb, l.Y})
+		}
+		pa = l.X + l.Len
+		pb = l.Y + l.Len
+	}
+	if pa < alen || pb < blen {
+		diffs = append(diffs, Diff{pa, alen, pb, blen})
+	}
+	return diffs
+}
+
+// --- FORWARD ---
+
+// fdone decides if the forwward path has reached the upper right
+// corner of the rectangle. If so, it also returns the computed lcs.
+func (e *editGraph) fdone(D, k int) (bool, lcs) {
+	// x, y, k are relative to the rectangle
+	x := e.vf.get(D, k)
+	y := x - k
+	if x == e.ux && y == e.uy {
+		return true, e.forwardlcs(D, k)
+	}
+	return false, nil
+}
+
+// run the forward algorithm, until success or up to the limit on D.
+func forward(e *editGraph) lcs {
+	e.setForward(0, 0, e.lx)
+	if ok, ans := e.fdone(0, 0); ok {
+		return ans
+	}
+	// from D to D+1
+	for D := 0; D < e.limit; D++ {
+		e.setForward(D+1, -(D + 1), e.getForward(D, -D))
+		if ok, ans := e.fdone(D+1, -(D + 1)); ok {
+			return ans
+		}
+		e.setForward(D+1, D+1, e.getForward(D, D)+1)
+		if ok, ans := e.fdone(D+1, D+1); ok {
+			return ans
+		}
+		for k := -D + 1; k <= D-1; k += 2 {
+			// these are tricky and easy to get backwards
+			lookv := e.lookForward(k, e.getForward(D, k-1)+1)
+			lookh := e.lookForward(k, e.getForward(D, k+1))
+			if lookv > lookh {
+				e.setForward(D+1, k, lookv)
+			} else {
+				e.setForward(D+1, k, lookh)
+			}
+			if ok, ans := e.fdone(D+1, k); ok {
+				return ans
+			}
+		}
+	}
+	// D is too large
+	// find the D path with maximal x+y inside the rectangle and
+	// use that to compute the found part of the lcs
+	kmax := -e.limit - 1
+	diagmax := -1
+	for k := -e.limit; k <= e.limit; k += 2 {
+		x := e.getForward(e.limit, k)
+		y := x - k
+		if x+y > diagmax && x <= e.ux && y <= e.uy {
+			diagmax, kmax = x+y, k
+		}
+	}
+	return e.forwardlcs(e.limit, kmax)
+}
+
+// recover the lcs by backtracking from the farthest point reached
+func (e *editGraph) forwardlcs(D, k int) lcs {
+	var ans lcs
+	for x := e.getForward(D, k); x != 0 || x-k != 0; {
+		if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) {
+			// if (x-1,y) is labelled D-1, x--,D--,k--,continue
+			D, k, x = D-1, k-1, x-1
+			continue
+		} else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) {
+			// if (x,y-1) is labelled D-1, x, D--,k++, continue
+			D, k = D-1, k+1
+			continue
+		}
+		// if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue
+		y := x - k
+		ans = ans.prepend(x+e.lx-1, y+e.ly-1)
+		x--
+	}
+	return ans
+}
+
+// start at (x,y), go up the diagonal as far as possible,
+// and label the result with d
+func (e *editGraph) lookForward(k, relx int) int {
+	rely := relx - k
+	x, y := relx+e.lx, rely+e.ly
+	if x < e.ux && y < e.uy {
+		x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy)
+	}
+	return x
+}
+
+func (e *editGraph) setForward(d, k, relx int) {
+	x := e.lookForward(k, relx)
+	e.vf.set(d, k, x-e.lx)
+}
+
+func (e *editGraph) getForward(d, k int) int {
+	x := e.vf.get(d, k)
+	return x
+}
+
+// --- BACKWARD ---
+
+// bdone decides if the backward path has reached the lower left corner
+func (e *editGraph) bdone(D, k int) (bool, lcs) {
+	// x, y, k are relative to the rectangle
+	x := e.vb.get(D, k)
+	y := x - (k + e.delta)
+	if x == 0 && y == 0 {
+		return true, e.backwardlcs(D, k)
+	}
+	return false, nil
+}
+
+// run the backward algorithm, until success or up to the limit on D.
+func backward(e *editGraph) lcs {
+	e.setBackward(0, 0, e.ux)
+	if ok, ans := e.bdone(0, 0); ok {
+		return ans
+	}
+	// from D to D+1
+	for D := 0; D < e.limit; D++ {
+		e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
+		if ok, ans := e.bdone(D+1, -(D + 1)); ok {
+			return ans
+		}
+		e.setBackward(D+1, D+1, e.getBackward(D, D))
+		if ok, ans := e.bdone(D+1, D+1); ok {
+			return ans
+		}
+		for k := -D + 1; k <= D-1; k += 2 {
+			// these are tricky and easy to get wrong
+			lookv := e.lookBackward(k, e.getBackward(D, k-1))
+			lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
+			if lookv < lookh {
+				e.setBackward(D+1, k, lookv)
+			} else {
+				e.setBackward(D+1, k, lookh)
+			}
+			if ok, ans := e.bdone(D+1, k); ok {
+				return ans
+			}
+		}
+	}
+
+	// D is too large
+	// find the D path with minimal x+y inside the rectangle and
+	// use that to compute the part of the lcs found
+	kmax := -e.limit - 1
+	diagmin := 1 << 25
+	for k := -e.limit; k <= e.limit; k += 2 {
+		x := e.getBackward(e.limit, k)
+		y := x - (k + e.delta)
+		if x+y < diagmin && x >= 0 && y >= 0 {
+			diagmin, kmax = x+y, k
+		}
+	}
+	if kmax < -e.limit {
+		panic(fmt.Sprintf("no paths when limit=%d?", e.limit))
+	}
+	return e.backwardlcs(e.limit, kmax)
+}
+
+// recover the lcs by backtracking
+func (e *editGraph) backwardlcs(D, k int) lcs {
+	var ans lcs
+	for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; {
+		if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) {
+			// D--, k--, x unchanged
+			D, k = D-1, k-1
+			continue
+		} else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) {
+			// D--, k++, x++
+			D, k, x = D-1, k+1, x+1
+			continue
+		}
+		y := x - (k + e.delta)
+		ans = ans.append(x+e.lx, y+e.ly)
+		x++
+	}
+	return ans
+}
+
+// start at (x,y), go down the diagonal as far as possible,
+func (e *editGraph) lookBackward(k, relx int) int {
+	rely := relx - (k + e.delta) // forward k = k + e.delta
+	x, y := relx+e.lx, rely+e.ly
+	if x > 0 && y > 0 {
+		x -= e.seqs.commonSuffixLen(0, x, 0, y)
+	}
+	return x
+}
+
+// convert to rectangle, and label the result with d
+func (e *editGraph) setBackward(d, k, relx int) {
+	x := e.lookBackward(k, relx)
+	e.vb.set(d, k, x-e.lx)
+}
+
+func (e *editGraph) getBackward(d, k int) int {
+	x := e.vb.get(d, k)
+	return x
+}
+
+// -- TWOSIDED ---
+
+func twosided(e *editGraph) lcs {
+	// The termination condition could be improved, as either the forward
+	// or backward pass could succeed before Myers' Lemma applies.
+	// Aside from questions of efficiency (is the extra testing cost-effective)
+	// this is more likely to matter when e.limit is reached.
+	e.setForward(0, 0, e.lx)
+	e.setBackward(0, 0, e.ux)
+
+	// from D to D+1
+	for D := 0; D < e.limit; D++ {
+		// just finished a backwards pass, so check
+		if got, ok := e.twoDone(D, D); ok {
+			return e.twolcs(D, D, got)
+		}
+		// do a forwards pass (D to D+1)
+		e.setForward(D+1, -(D + 1), e.getForward(D, -D))
+		e.setForward(D+1, D+1, e.getForward(D, D)+1)
+		for k := -D + 1; k <= D-1; k += 2 {
+			// these are tricky and easy to get backwards
+			lookv := e.lookForward(k, e.getForward(D, k-1)+1)
+			lookh := e.lookForward(k, e.getForward(D, k+1))
+			if lookv > lookh {
+				e.setForward(D+1, k, lookv)
+			} else {
+				e.setForward(D+1, k, lookh)
+			}
+		}
+		// just did a forward pass, so check
+		if got, ok := e.twoDone(D+1, D); ok {
+			return e.twolcs(D+1, D, got)
+		}
+		// do a backward pass, D to D+1
+		e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
+		e.setBackward(D+1, D+1, e.getBackward(D, D))
+		for k := -D + 1; k <= D-1; k += 2 {
+			// these are tricky and easy to get wrong
+			lookv := e.lookBackward(k, e.getBackward(D, k-1))
+			lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
+			if lookv < lookh {
+				e.setBackward(D+1, k, lookv)
+			} else {
+				e.setBackward(D+1, k, lookh)
+			}
+		}
+	}
+
+	// D too large. combine a forward and backward partial lcs
+	// first, a forward one
+	kmax := -e.limit - 1
+	diagmax := -1
+	for k := -e.limit; k <= e.limit; k += 2 {
+		x := e.getForward(e.limit, k)
+		y := x - k
+		if x+y > diagmax && x <= e.ux && y <= e.uy {
+			diagmax, kmax = x+y, k
+		}
+	}
+	if kmax < -e.limit {
+		panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit))
+	}
+	lcs := e.forwardlcs(e.limit, kmax)
+	// now a backward one
+	// find the D path with minimal x+y inside the rectangle and
+	// use that to compute the lcs
+	diagmin := 1 << 25 // infinity
+	for k := -e.limit; k <= e.limit; k += 2 {
+		x := e.getBackward(e.limit, k)
+		y := x - (k + e.delta)
+		if x+y < diagmin && x >= 0 && y >= 0 {
+			diagmin, kmax = x+y, k
+		}
+	}
+	if kmax < -e.limit {
+		panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit))
+	}
+	lcs = append(lcs, e.backwardlcs(e.limit, kmax)...)
+	// These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs)
+	ans := lcs.fix()
+	return ans
+}
+
+// Does Myers' Lemma apply?
+func (e *editGraph) twoDone(df, db int) (int, bool) {
+	if (df+db+e.delta)%2 != 0 {
+		return 0, false // diagonals cannot overlap
+	}
+	kmin := -db + e.delta
+	if -df > kmin {
+		kmin = -df
+	}
+	kmax := db + e.delta
+	if df < kmax {
+		kmax = df
+	}
+	for k := kmin; k <= kmax; k += 2 {
+		x := e.vf.get(df, k)
+		u := e.vb.get(db, k-e.delta)
+		if u <= x {
+			// is it worth looking at all the other k?
+			for l := k; l <= kmax; l += 2 {
+				x := e.vf.get(df, l)
+				y := x - l
+				u := e.vb.get(db, l-e.delta)
+				v := u - l
+				if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux {
+					return l, true
+				}
+			}
+			return k, true
+		}
+	}
+	return 0, false
+}
+
+func (e *editGraph) twolcs(df, db, kf int) lcs {
+	// db==df || db+1==df
+	x := e.vf.get(df, kf)
+	y := x - kf
+	kb := kf - e.delta
+	u := e.vb.get(db, kb)
+	v := u - kf
+
+	// Myers proved there is a df-path from (0,0) to (u,v)
+	// and a db-path from (x,y) to (N,M).
+	// In the first case the overall path is the forward path
+	// to (u,v) followed by the backward path to (N,M).
+	// In the second case the path is the backward path to (x,y)
+	// followed by the forward path to (x,y) from (0,0).
+
+	// Look for some special cases to avoid computing either of these paths.
+	if x == u {
+		// "babaab" "cccaba"
+		// already patched together
+		lcs := e.forwardlcs(df, kf)
+		lcs = append(lcs, e.backwardlcs(db, kb)...)
+		return lcs.sort()
+	}
+
+	// is (u-1,v) or (u,v-1) labelled df-1?
+	// if so, that forward df-1-path plus a horizontal or vertical edge
+	// is the df-path to (u,v), then plus the db-path to (N,M)
+	if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 {
+		//  "aabbab" "cbcabc"
+		lcs := e.forwardlcs(df-1, u-1-v)
+		lcs = append(lcs, e.backwardlcs(db, kb)...)
+		return lcs.sort()
+	}
+	if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u {
+		//  "abaabb" "bcacab"
+		lcs := e.forwardlcs(df-1, u-(v-1))
+		lcs = append(lcs, e.backwardlcs(db, kb)...)
+		return lcs.sort()
+	}
+
+	// The path can't possibly contribute to the lcs because it
+	// is all horizontal or vertical edges
+	if u == 0 || v == 0 || x == e.ux || y == e.uy {
+		// "abaabb" "abaaaa"
+		if u == 0 || v == 0 {
+			return e.backwardlcs(db, kb)
+		}
+		return e.forwardlcs(df, kf)
+	}
+
+	// is (x+1,y) or (x,y+1) labelled db-1?
+	if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 {
+		// "bababb" "baaabb"
+		lcs := e.backwardlcs(db-1, kb+1)
+		lcs = append(lcs, e.forwardlcs(df, kf)...)
+		return lcs.sort()
+	}
+	if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x {
+		// "abbbaa" "cabacc"
+		lcs := e.backwardlcs(db-1, kb-1)
+		lcs = append(lcs, e.forwardlcs(df, kf)...)
+		return lcs.sort()
+	}
+
+	// need to compute another path
+	// "aabbaa" "aacaba"
+	lcs := e.backwardlcs(db, kb)
+	oldx, oldy := e.ux, e.uy
+	e.ux = u
+	e.uy = v
+	lcs = append(lcs, forward(e)...)
+	e.ux, e.uy = oldx, oldy
+	return lcs.sort()
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/lcs/old_test.go b/pkg/plugin/processor/builtin/internal/diff/lcs/old_test.go
new file mode 100644
index 000000000..789e9bc6c
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/lcs/old_test.go
@@ -0,0 +1,251 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+import (
+	"fmt"
+	"log"
+	"math/rand"
+	"os"
+	"strings"
+	"testing"
+)
+
+func TestAlgosOld(t *testing.T) {
+	for i, algo := range []func(*editGraph) lcs{forward, backward, twosided} {
+		t.Run(strings.Fields("forward backward twosided")[i], func(t *testing.T) {
+			for _, tx := range Btests {
+				lim := len(tx.a) + len(tx.b)
+
+				diffs, lcs := compute(stringSeqs{tx.a, tx.b}, algo, lim)
+				check(t, tx.a, lcs, tx.lcs)
+				checkDiffs(t, tx.a, diffs, tx.b)
+
+				diffs, lcs = compute(stringSeqs{tx.b, tx.a}, algo, lim)
+				check(t, tx.b, lcs, tx.lcs)
+				checkDiffs(t, tx.b, diffs, tx.a)
+			}
+		})
+	}
+}
+
+func TestIntOld(t *testing.T) {
+	// need to avoid any characters in btests
+	lfill, rfill := "AAAAAAAAAAAA", "BBBBBBBBBBBB"
+	for _, tx := range Btests {
+		if len(tx.a) < 2 || len(tx.b) < 2 {
+			continue
+		}
+		left := tx.a + lfill
+		right := tx.b + rfill
+		lim := len(tx.a) + len(tx.b)
+		diffs, lcs := compute(stringSeqs{left, right}, twosided, lim)
+		check(t, left, lcs, tx.lcs)
+		checkDiffs(t, left, diffs, right)
+		diffs, lcs = compute(stringSeqs{right, left}, twosided, lim)
+		check(t, right, lcs, tx.lcs)
+		checkDiffs(t, right, diffs, left)
+
+		left = lfill + tx.a
+		right = rfill + tx.b
+		diffs, lcs = compute(stringSeqs{left, right}, twosided, lim)
+		check(t, left, lcs, tx.lcs)
+		checkDiffs(t, left, diffs, right)
+		diffs, lcs = compute(stringSeqs{right, left}, twosided, lim)
+		check(t, right, lcs, tx.lcs)
+		checkDiffs(t, right, diffs, left)
+	}
+}
+
+func TestSpecialOld(t *testing.T) { // exercises lcs.fix
+	a := "golang.org/x/tools/intern"
+	b := "github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/intern"
+	diffs, lcs := compute(stringSeqs{a, b}, twosided, 4)
+	if !lcs.valid() {
+		t.Errorf("%d,%v", len(diffs), lcs)
+	}
+}
+
+func TestRegressionOld001(t *testing.T) {
+	a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
+
+	b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
+	for i := 1; i < len(b); i++ {
+		diffs, lcs := compute(stringSeqs{a, b}, twosided, i) // 14 from gopls
+		if !lcs.valid() {
+			t.Errorf("%d,%v", len(diffs), lcs)
+		}
+		checkDiffs(t, a, diffs, b)
+	}
+}
+
+func TestRegressionOld002(t *testing.T) {
+	a := "n\"\n)\n"
+	b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
+	for i := 1; i <= len(b); i++ {
+		diffs, lcs := compute(stringSeqs{a, b}, twosided, i)
+		if !lcs.valid() {
+			t.Errorf("%d,%v", len(diffs), lcs)
+		}
+		checkDiffs(t, a, diffs, b)
+	}
+}
+
+func TestRegressionOld003(t *testing.T) {
+	a := "golang.org/x/hello v1.0.0\nrequire golang.org/x/unused v1"
+	b := "golang.org/x/hello v1"
+	for i := 1; i <= len(a); i++ {
+		diffs, lcs := compute(stringSeqs{a, b}, twosided, i)
+		if !lcs.valid() {
+			t.Errorf("%d,%v", len(diffs), lcs)
+		}
+		checkDiffs(t, a, diffs, b)
+	}
+}
+
+func TestRandOld(t *testing.T) {
+	rand.Seed(1)
+	for i := 0; i < 1000; i++ {
+		// TODO(adonovan): use ASCII and bytesSeqs here? The use of
+		// non-ASCII isn't relevant to the property exercised by the test.
+		a := []rune(randstr("abω", 16))
+		b := []rune(randstr("abωc", 16))
+		seq := runesSeqs{a, b}
+
+		const lim = 24 // large enough to get true lcs
+		_, forw := compute(seq, forward, lim)
+		_, back := compute(seq, backward, lim)
+		_, two := compute(seq, twosided, lim)
+		if lcslen(two) != lcslen(forw) || lcslen(forw) != lcslen(back) {
+			t.Logf("\n%v\n%v\n%v", forw, back, two)
+			t.Fatalf("%d forw:%d back:%d two:%d", i, lcslen(forw), lcslen(back), lcslen(two))
+		}
+		if !two.valid() || !forw.valid() || !back.valid() {
+			t.Errorf("check failure")
+		}
+	}
+}
+
+// TestDiffAPI tests the public API functions (Diff{Bytes,Strings,Runes})
+// to ensure at least miminal parity of the three representations.
+func TestDiffAPI(t *testing.T) {
+	for _, test := range []struct {
+		a, b                              string
+		wantStrings, wantBytes, wantRunes string
+	}{
+		{"abcXdef", "abcxdef", "[{3 4 3 4}]", "[{3 4 3 4}]", "[{3 4 3 4}]"}, // ASCII
+		{"abcωdef", "abcΩdef", "[{3 5 3 5}]", "[{3 5 3 5}]", "[{3 4 3 4}]"}, // non-ASCII
+	} {
+
+		gotStrings := fmt.Sprint(DiffStrings(test.a, test.b))
+		if gotStrings != test.wantStrings {
+			t.Errorf("DiffStrings(%q, %q) = %v, want %v",
+				test.a, test.b, gotStrings, test.wantStrings)
+		}
+		gotBytes := fmt.Sprint(DiffBytes([]byte(test.a), []byte(test.b)))
+		if gotBytes != test.wantBytes {
+			t.Errorf("DiffBytes(%q, %q) = %v, want %v",
+				test.a, test.b, gotBytes, test.wantBytes)
+		}
+		gotRunes := fmt.Sprint(DiffRunes([]rune(test.a), []rune(test.b)))
+		if gotRunes != test.wantRunes {
+			t.Errorf("DiffRunes(%q, %q) = %v, want %v",
+				test.a, test.b, gotRunes, test.wantRunes)
+		}
+	}
+}
+
+func BenchmarkTwoOld(b *testing.B) {
+	tests := genBench("abc", 96)
+	for i := 0; i < b.N; i++ {
+		for _, tt := range tests {
+			_, two := compute(stringSeqs{tt.before, tt.after}, twosided, 100)
+			if !two.valid() {
+				b.Error("check failed")
+			}
+		}
+	}
+}
+
+func BenchmarkForwOld(b *testing.B) {
+	tests := genBench("abc", 96)
+	for i := 0; i < b.N; i++ {
+		for _, tt := range tests {
+			_, two := compute(stringSeqs{tt.before, tt.after}, forward, 100)
+			if !two.valid() {
+				b.Error("check failed")
+			}
+		}
+	}
+}
+
+func genBench(set string, n int) []struct{ before, after string } {
+	// before and after for benchmarks. 24 strings of length n with
+	// before and after differing at least once, and about 5%
+	rand.Seed(3)
+	var ans []struct{ before, after string }
+	for i := 0; i < 24; i++ {
+		// maybe b should have an approximately known number of diffs
+		a := randstr(set, n)
+		cnt := 0
+		bb := make([]rune, 0, n)
+		for _, r := range a {
+			if rand.Float64() < .05 {
+				cnt++
+				r = 'N'
+			}
+			bb = append(bb, r)
+		}
+		if cnt == 0 {
+			// avoid == shortcut
+			bb[n/2] = 'N'
+		}
+		ans = append(ans, struct{ before, after string }{a, string(bb)})
+	}
+	return ans
+}
+
+// This benchmark represents a common case for a diff command:
+// large file with a single relatively small diff in the middle.
+// (It's not clear whether this is representative of gopls workloads
+// or whether it is important to gopls diff performance.)
+//
+// TODO(adonovan) opt: it could be much faster.  For example,
+// comparing a file against itself is about 10x faster than with the
+// small deletion in the middle. Strangely, comparing a file against
+// itself minus the last byte is faster still; I don't know why.
+// There is much low-hanging fruit here for further improvement.
+func BenchmarkLargeFileSmallDiff(b *testing.B) {
+	data, err := os.ReadFile("old.go") // large file
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	n := len(data)
+
+	src := string(data)
+	dst := src[:n*49/100] + src[n*51/100:] // remove 2% from the middle
+	b.Run("string", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			compute(stringSeqs{src, dst}, twosided, len(src)+len(dst))
+		}
+	})
+
+	srcBytes := []byte(src)
+	dstBytes := []byte(dst)
+	b.Run("bytes", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			compute(bytesSeqs{srcBytes, dstBytes}, twosided, len(srcBytes)+len(dstBytes))
+		}
+	})
+
+	srcRunes := []rune(src)
+	dstRunes := []rune(dst)
+	b.Run("runes", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			compute(runesSeqs{srcRunes, dstRunes}, twosided, len(srcRunes)+len(dstRunes))
+		}
+	})
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/lcs/sequence.go b/pkg/plugin/processor/builtin/internal/diff/lcs/sequence.go
new file mode 100644
index 000000000..2d72d2630
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/lcs/sequence.go
@@ -0,0 +1,113 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+// This file defines the abstract sequence over which the LCS algorithm operates.
+
+// sequences abstracts a pair of sequences, A and B.
+type sequences interface {
+	lengths() (int, int)                    // len(A), len(B)
+	commonPrefixLen(ai, aj, bi, bj int) int // len(commonPrefix(A[ai:aj], B[bi:bj]))
+	commonSuffixLen(ai, aj, bi, bj int) int // len(commonSuffix(A[ai:aj], B[bi:bj]))
+}
+
+type stringSeqs struct{ a, b string }
+
+func (s stringSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
+func (s stringSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
+	return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj])
+}
+func (s stringSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
+	return commonSuffixLenString(s.a[ai:aj], s.b[bi:bj])
+}
+
+// The explicit capacity in s[i:j:j] leads to more efficient code.
+
+type bytesSeqs struct{ a, b []byte }
+
+func (s bytesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
+func (s bytesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
+	return commonPrefixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+func (s bytesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
+	return commonSuffixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+
+type runesSeqs struct{ a, b []rune }
+
+func (s runesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
+func (s runesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
+	return commonPrefixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+func (s runesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
+	return commonSuffixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+
+// TODO(adonovan): optimize these functions using ideas from:
+// - https://go.dev/cl/408116 common.go
+// - https://go.dev/cl/421435 xor_generic.go
+
+// TODO(adonovan): factor using generics when available,
+// but measure performance impact.
+
+// commonPrefixLen* returns the length of the common prefix of a[ai:aj] and b[bi:bj].
+func commonPrefixLenBytes(a, b []byte) int {
+	n := min(len(a), len(b))
+	i := 0
+	for i < n && a[i] == b[i] {
+		i++
+	}
+	return i
+}
+func commonPrefixLenRunes(a, b []rune) int {
+	n := min(len(a), len(b))
+	i := 0
+	for i < n && a[i] == b[i] {
+		i++
+	}
+	return i
+}
+func commonPrefixLenString(a, b string) int {
+	n := min(len(a), len(b))
+	i := 0
+	for i < n && a[i] == b[i] {
+		i++
+	}
+	return i
+}
+
+// commonSuffixLen* returns the length of the common suffix of a[ai:aj] and b[bi:bj].
+func commonSuffixLenBytes(a, b []byte) int {
+	n := min(len(a), len(b))
+	i := 0
+	for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
+		i++
+	}
+	return i
+}
+func commonSuffixLenRunes(a, b []rune) int {
+	n := min(len(a), len(b))
+	i := 0
+	for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
+		i++
+	}
+	return i
+}
+func commonSuffixLenString(a, b string) int {
+	n := min(len(a), len(b))
+	i := 0
+	for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
+		i++
+	}
+	return i
+}
+
+func min(x, y int) int {
+	if x < y {
+		return x
+	} else {
+		return y
+	}
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/myers/diff.go b/pkg/plugin/processor/builtin/internal/diff/myers/diff.go
new file mode 100644
index 000000000..d2b8d1ee6
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/myers/diff.go
@@ -0,0 +1,246 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package myers implements the Myers diff algorithm.
+package myers
+
+import (
+	"strings"
+
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff"
+)
+
+// Sources:
+// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
+// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
+
+// ComputeEdits returns the diffs of two strings using a simple
+// line-based implementation, like [diff.Strings].
+//
+// Deprecated: this implementation is moribund. However, when diffs
+// appear in marker test expectations, they are the particular diffs
+// produced by this implementation. The marker test framework
+// asserts diff(orig, got)==wantDiff, but ideally it would compute
+// got==apply(orig, wantDiff) so that the notation of the diff
+// is immaterial.
+func ComputeEdits(before, after string) []diff.Edit {
+	beforeLines := splitLines(before)
+	ops := operations(beforeLines, splitLines(after))
+
+	// Build a table mapping line number to offset.
+	lineOffsets := make([]int, 0, len(beforeLines)+1)
+	total := 0
+	for i := range beforeLines {
+		lineOffsets = append(lineOffsets, total)
+		total += len(beforeLines[i])
+	}
+	lineOffsets = append(lineOffsets, total) // EOF
+
+	edits := make([]diff.Edit, 0, len(ops))
+	for _, op := range ops {
+		start, end := lineOffsets[op.I1], lineOffsets[op.I2]
+		switch op.Kind {
+		case opDelete:
+			// Delete: before[I1:I2] is deleted.
+			edits = append(edits, diff.Edit{Start: start, End: end})
+		case opInsert:
+			// Insert: after[J1:J2] is inserted at before[I1:I1].
+			if content := strings.Join(op.Content, ""); content != "" {
+				edits = append(edits, diff.Edit{Start: start, End: end, New: content})
+			}
+		}
+	}
+	return edits
+}
+
+// opKind is used to denote the type of operation a line represents.
+type opKind int
+
+const (
+	opDelete opKind = iota // line deleted from input (-)
+	opInsert               // line inserted into output (+)
+	opEqual                // line present in input and output
+)
+
+func (kind opKind) String() string {
+	switch kind {
+	case opDelete:
+		return "delete"
+	case opInsert:
+		return "insert"
+	case opEqual:
+		return "equal"
+	default:
+		panic("unknown opKind")
+	}
+}
+
+type operation struct {
+	Kind    opKind
+	Content []string // content from b
+	I1, I2  int      // indices of the line in a
+	J1      int      // indices of the line in b, J2 implied by len(Content)
+}
+
+// operations returns the list of operations to convert a into b, consolidating
+// operations for multiple lines and not including equal lines.
+func operations(a, b []string) []*operation {
+	if len(a) == 0 && len(b) == 0 {
+		return nil
+	}
+
+	trace, offset := shortestEditSequence(a, b)
+	snakes := backtrack(trace, len(a), len(b), offset)
+
+	M, N := len(a), len(b)
+
+	var i int
+	solution := make([]*operation, len(a)+len(b))
+
+	add := func(op *operation, i2, j2 int) {
+		if op == nil {
+			return
+		}
+		op.I2 = i2
+		if op.Kind == opInsert {
+			op.Content = b[op.J1:j2]
+		}
+		solution[i] = op
+		i++
+	}
+	x, y := 0, 0
+	for _, snake := range snakes {
+		if len(snake) < 2 {
+			continue
+		}
+		var op *operation
+		// delete (horizontal)
+		for snake[0]-snake[1] > x-y {
+			if op == nil {
+				op = &operation{
+					Kind: opDelete,
+					I1:   x,
+					J1:   y,
+				}
+			}
+			x++
+			if x == M {
+				break
+			}
+		}
+		add(op, x, y)
+		op = nil
+		// insert (vertical)
+		for snake[0]-snake[1] < x-y {
+			if op == nil {
+				op = &operation{
+					Kind: opInsert,
+					I1:   x,
+					J1:   y,
+				}
+			}
+			y++
+		}
+		add(op, x, y)
+		op = nil
+		// equal (diagonal)
+		for x < snake[0] {
+			x++
+			y++
+		}
+		if x >= M && y >= N {
+			break
+		}
+	}
+	return solution[:i]
+}
+
+// backtrack uses the trace for the edit sequence computation and returns the
+// "snakes" that make up the solution. A "snake" is a single deletion or
+// insertion followed by zero or diagonals.
+func backtrack(trace [][]int, x, y, offset int) [][]int {
+	snakes := make([][]int, len(trace))
+	d := len(trace) - 1
+	for ; x > 0 && y > 0 && d > 0; d-- {
+		V := trace[d]
+		if len(V) == 0 {
+			continue
+		}
+		snakes[d] = []int{x, y}
+
+		k := x - y
+
+		var kPrev int
+		if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
+			kPrev = k + 1
+		} else {
+			kPrev = k - 1
+		}
+
+		x = V[kPrev+offset]
+		y = x - kPrev
+	}
+	if x < 0 || y < 0 {
+		return snakes
+	}
+	snakes[d] = []int{x, y}
+	return snakes
+}
+
+// shortestEditSequence returns the shortest edit sequence that converts a into b.
+func shortestEditSequence(a, b []string) ([][]int, int) {
+	M, N := len(a), len(b)
+	V := make([]int, 2*(N+M)+1)
+	offset := N + M
+	trace := make([][]int, N+M+1)
+
+	// Iterate through the maximum possible length of the SES (N+M).
+	for d := 0; d <= N+M; d++ {
+		copyV := make([]int, len(V))
+		// k lines are represented by the equation y = x - k. We move in
+		// increments of 2 because end points for even d are on even k lines.
+		for k := -d; k <= d; k += 2 {
+			// At each point, we either go down or to the right. We go down if
+			// k == -d, and we go to the right if k == d. We also prioritize
+			// the maximum x value, because we prefer deletions to insertions.
+			var x int
+			if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
+				x = V[k+1+offset] // down
+			} else {
+				x = V[k-1+offset] + 1 // right
+			}
+
+			y := x - k
+
+			// Diagonal moves while we have equal contents.
+			for x < M && y < N && a[x] == b[y] {
+				x++
+				y++
+			}
+
+			V[k+offset] = x
+
+			// Return if we've exceeded the maximum values.
+			if x == M && y == N {
+				// Makes sure to save the state of the array before returning.
+				copy(copyV, V)
+				trace[d] = copyV
+				return trace, offset
+			}
+		}
+
+		// Save the state of the array.
+		copy(copyV, V)
+		trace[d] = copyV
+	}
+	return nil, 0
+}
+
+func splitLines(text string) []string {
+	lines := strings.SplitAfter(text, "\n")
+	if lines[len(lines)-1] == "" {
+		lines = lines[:len(lines)-1]
+	}
+	return lines
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/myers/diff_test.go b/pkg/plugin/processor/builtin/internal/diff/myers/diff_test.go
new file mode 100644
index 000000000..98fb250c9
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/myers/diff_test.go
@@ -0,0 +1,16 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package myers_test
+
+import (
+	"testing"
+
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/difftest"
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/myers"
+)
+
+func TestDiff(t *testing.T) {
+	difftest.DiffTest(t, myers.ComputeEdits)
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/ndiff.go b/pkg/plugin/processor/builtin/internal/diff/ndiff.go
new file mode 100644
index 000000000..65b3fdb2f
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/ndiff.go
@@ -0,0 +1,99 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package diff
+
+import (
+	"bytes"
+	"unicode/utf8"
+
+	"github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/diff/lcs"
+)
+
+// Strings computes the differences between two strings.
+// The resulting edits respect rune boundaries.
+func Strings(before, after string) []Edit {
+	if before == after {
+		return nil // common case
+	}
+
+	if isASCII(before) && isASCII(after) {
+		// TODO(adonovan): opt: specialize diffASCII for strings.
+		return diffASCII([]byte(before), []byte(after))
+	}
+	return diffRunes([]rune(before), []rune(after))
+}
+
+// Bytes computes the differences between two byte slices.
+// The resulting edits respect rune boundaries.
+func Bytes(before, after []byte) []Edit {
+	if bytes.Equal(before, after) {
+		return nil // common case
+	}
+
+	if isASCII(before) && isASCII(after) {
+		return diffASCII(before, after)
+	}
+	return diffRunes(runes(before), runes(after))
+}
+
+func diffASCII(before, after []byte) []Edit {
+	diffs := lcs.DiffBytes(before, after)
+
+	// Convert from LCS diffs.
+	res := make([]Edit, len(diffs))
+	for i, d := range diffs {
+		res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])}
+	}
+	return res
+}
+
+func diffRunes(before, after []rune) []Edit {
+	diffs := lcs.DiffRunes(before, after)
+
+	// The diffs returned by the lcs package use indexes
+	// into whatever slice was passed in.
+	// Convert rune offsets to byte offsets.
+	res := make([]Edit, len(diffs))
+	lastEnd := 0
+	utf8Len := 0
+	for i, d := range diffs {
+		utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits
+		start := utf8Len
+		utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit
+		res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])}
+		lastEnd = d.End
+	}
+	return res
+}
+
+// runes is like []rune(string(bytes)) without the duplicate allocation.
+func runes(bytes []byte) []rune {
+	n := utf8.RuneCount(bytes)
+	runes := make([]rune, n)
+	for i := 0; i < n; i++ {
+		r, sz := utf8.DecodeRune(bytes)
+		bytes = bytes[sz:]
+		runes[i] = r
+	}
+	return runes
+}
+
+// runesLen returns the length in bytes of the UTF-8 encoding of runes.
+func runesLen(runes []rune) (len int) {
+	for _, r := range runes {
+		len += utf8.RuneLen(r)
+	}
+	return len
+}
+
+// isASCII reports whether s contains only ASCII.
+func isASCII[S string | []byte](s S) bool {
+	for i := 0; i < len(s); i++ {
+		if s[i] >= utf8.RuneSelf {
+			return false
+		}
+	}
+	return true
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/testenv/testenv.go b/pkg/plugin/processor/builtin/internal/diff/testenv/testenv.go
new file mode 100644
index 000000000..d142e9356
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/testenv/testenv.go
@@ -0,0 +1,199 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testenv
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"runtime/debug"
+	"strings"
+	"sync"
+	"testing"
+)
+
+// packageMainIsDevel reports whether the module containing package main
+// is a development version (if module information is available).
+func packageMainIsDevel() bool {
+	info, ok := debug.ReadBuildInfo()
+	if !ok {
+		// Most test binaries currently lack build info, but this should become more
+		// permissive once https://golang.org/issue/33976 is fixed.
+		return true
+	}
+
+	// Note: info.Main.Version describes the version of the module containing
+	// package main, not the version of “the main module”.
+	// See https://golang.org/issue/33975.
+	return info.Main.Version == "(devel)"
+}
+
+var checkGoBuild struct {
+	once sync.Once
+	err  error
+}
+
+func hasTool(tool string) error {
+	if tool == "cgo" {
+		enabled, err := cgoEnabled(false)
+		if err != nil {
+			return fmt.Errorf("checking cgo: %v", err)
+		}
+		if !enabled {
+			return fmt.Errorf("cgo not enabled")
+		}
+		return nil
+	}
+
+	_, err := exec.LookPath(tool)
+	if err != nil {
+		return err
+	}
+
+	switch tool {
+	case "patch":
+		// check that the patch tools supports the -o argument
+		temp, err := os.CreateTemp("", "patch-test")
+		if err != nil {
+			return err
+		}
+		temp.Close()
+		defer os.Remove(temp.Name())
+		cmd := exec.Command(tool, "-o", temp.Name())
+		if err := cmd.Run(); err != nil {
+			return err
+		}
+
+	case "go":
+		checkGoBuild.once.Do(func() {
+			if runtime.GOROOT() != "" {
+				// Ensure that the 'go' command found by exec.LookPath is from the correct
+				// GOROOT. Otherwise, 'some/path/go test ./...' will test against some
+				// version of the 'go' binary other than 'some/path/go', which is almost
+				// certainly not what the user intended.
+				out, err := exec.Command(tool, "env", "GOROOT").CombinedOutput()
+				if err != nil {
+					checkGoBuild.err = err
+					return
+				}
+				GOROOT := strings.TrimSpace(string(out))
+				if GOROOT != runtime.GOROOT() {
+					checkGoBuild.err = fmt.Errorf("'go env GOROOT' does not match runtime.GOROOT:\n\tgo env: %s\n\tGOROOT: %s", GOROOT, runtime.GOROOT())
+					return
+				}
+			}
+
+			dir, err := os.MkdirTemp("", "testenv-*")
+			if err != nil {
+				checkGoBuild.err = err
+				return
+			}
+			defer os.RemoveAll(dir)
+
+			mainGo := filepath.Join(dir, "main.go")
+			if err := os.WriteFile(mainGo, []byte("package main\nfunc main() {}\n"), 0644); err != nil {
+				checkGoBuild.err = err
+				return
+			}
+			cmd := exec.Command("go", "build", "-o", os.DevNull, mainGo)
+			cmd.Dir = dir
+			if out, err := cmd.CombinedOutput(); err != nil {
+				if len(out) > 0 {
+					checkGoBuild.err = fmt.Errorf("%v: %v\n%s", cmd, err, out)
+				} else {
+					checkGoBuild.err = fmt.Errorf("%v: %v", cmd, err)
+				}
+			}
+		})
+		if checkGoBuild.err != nil {
+			return checkGoBuild.err
+		}
+
+	case "diff":
+		// Check that diff is the GNU version, needed for the -u argument and
+		// to report missing newlines at the end of files.
+		out, err := exec.Command(tool, "-version").Output()
+		if err != nil {
+			return err
+		}
+		if !bytes.Contains(out, []byte("GNU diffutils")) {
+			return fmt.Errorf("diff is not the GNU version")
+		}
+	}
+
+	return nil
+}
+
+func cgoEnabled(bypassEnvironment bool) (bool, error) {
+	cmd := exec.Command("go", "env", "CGO_ENABLED")
+	if bypassEnvironment {
+		cmd.Env = append(append([]string(nil), os.Environ()...), "CGO_ENABLED=")
+	}
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return false, err
+	}
+	enabled := strings.TrimSpace(string(out))
+	return enabled == "1", nil
+}
+
+func allowMissingTool(tool string) bool {
+	switch runtime.GOOS {
+	case "aix", "darwin", "dragonfly", "freebsd", "illumos", "linux", "netbsd", "openbsd", "plan9", "solaris", "windows":
+		// Known non-mobile OS. Expect a reasonably complete environment.
+	default:
+		return true
+	}
+
+	switch tool {
+	case "cgo":
+		if strings.HasSuffix(os.Getenv("GO_BUILDER_NAME"), "-nocgo") {
+			// Explicitly disabled on -nocgo builders.
+			return true
+		}
+		if enabled, err := cgoEnabled(true); err == nil && !enabled {
+			// No platform support.
+			return true
+		}
+	case "go":
+		if os.Getenv("GO_BUILDER_NAME") == "illumos-amd64-joyent" {
+			// Work around a misconfigured builder (see https://golang.org/issue/33950).
+			return true
+		}
+	case "diff":
+		if os.Getenv("GO_BUILDER_NAME") != "" {
+			return true
+		}
+	case "patch":
+		if os.Getenv("GO_BUILDER_NAME") != "" {
+			return true
+		}
+	}
+
+	// If a developer is actively working on this test, we expect them to have all
+	// of its dependencies installed. However, if it's just a dependency of some
+	// other module (for example, being run via 'go test all'), we should be more
+	// tolerant of unusual environments.
+	return !packageMainIsDevel()
+}
+
+// NeedsTool skips t if the named tool is not present in the path.
+// As a special case, "cgo" means "go" is present and can compile cgo programs.
+func NeedsTool(t testing.TB, tool string) {
+	err := hasTool(tool)
+	if err == nil {
+		return
+	}
+
+	t.Helper()
+	if allowMissingTool(tool) {
+		t.Skipf("skipping because %s tool not available: %v", tool, err)
+	} else {
+		t.Fatalf("%s tool not available: %v", tool, err)
+	}
+}
diff --git a/pkg/plugin/processor/builtin/internal/diff/unified.go b/pkg/plugin/processor/builtin/internal/diff/unified.go
new file mode 100644
index 000000000..cfbda6102
--- /dev/null
+++ b/pkg/plugin/processor/builtin/internal/diff/unified.go
@@ -0,0 +1,251 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package diff
+
+import (
+	"fmt"
+	"log"
+	"strings"
+)
+
+// DefaultContextLines is the number of unchanged lines of surrounding
+// context displayed by Unified. Use ToUnified to specify a different value.
+const DefaultContextLines = 3
+
+// Unified returns a unified diff of the old and new strings.
+// The old and new labels are the names of the old and new files.
+// If the strings are equal, it returns the empty string.
+func Unified(oldLabel, newLabel, old, new string) string {
+	edits := Strings(old, new)
+	unified, err := ToUnified(oldLabel, newLabel, old, edits, DefaultContextLines)
+	if err != nil {
+		// Can't happen: edits are consistent.
+		log.Fatalf("internal error in diff.Unified: %v", err)
+	}
+	return unified
+}
+
+// ToUnified applies the edits to content and returns a unified diff,
+// with contextLines lines of (unchanged) context around each diff hunk.
+// The old and new labels are the names of the content and result files.
+// It returns an error if the edits are inconsistent; see ApplyEdits.
+func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines int) (string, error) {
+	u, err := toUnified(oldLabel, newLabel, content, edits, contextLines)
+	if err != nil {
+		return "", err
+	}
+	return u.String(), nil
+}
+
+// unified represents a set of edits as a unified diff.
+type unified struct {
+	// from is the name of the original file.
+	from string
+	// to is the name of the modified file.
+	to string
+	// hunks is the set of edit hunks needed to transform the file content.
+	hunks []*hunk
+}
+
+// Hunk represents a contiguous set of line edits to apply.
+type hunk struct {
+	// The line in the original source where the hunk starts.
+	fromLine int
+	// The line in the original source where the hunk finishes.
+	toLine int
+	// The set of line based edits to apply.
+	lines []line
+}
+
+// Line represents a single line operation to apply as part of a Hunk.
+type line struct {
+	// kind is the type of line this represents, deletion, insertion or copy.
+	kind opKind
+	// content is the content of this line.
+	// For deletion it is the line being removed, for all others it is the line
+	// to put in the output.
+	content string
+}
+
+// opKind is used to denote the type of operation a line represents.
+type opKind int
+
+const (
+	// opDelete is the operation kind for a line that is present in the input
+	// but not in the output.
+	opDelete opKind = iota
+	// opInsert is the operation kind for a line that is new in the output.
+	opInsert
+	// opEqual is the operation kind for a line that is the same in the input and
+	// output, often used to provide context around edited lines.
+	opEqual
+)
+
+// String returns a human readable representation of an OpKind. It is not
+// intended for machine processing.
+func (k opKind) String() string {
+	switch k {
+	case opDelete:
+		return "delete"
+	case opInsert:
+		return "insert"
+	case opEqual:
+		return "equal"
+	default:
+		panic("unknown operation kind")
+	}
+}
+
+// toUnified takes a file contents and a sequence of edits, and calculates
+// a unified diff that represents those edits.
+func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) {
+	gap := contextLines * 2
+	u := unified{
+		from: fromName,
+		to:   toName,
+	}
+	if len(edits) == 0 {
+		return u, nil
+	}
+	var err error
+	edits, err = lineEdits(content, edits) // expand to whole lines
+	if err != nil {
+		return u, err
+	}
+	lines := splitLines(content)
+	var h *hunk
+	last := 0
+	toLine := 0
+	for _, edit := range edits {
+		// Compute the zero-based line numbers of the edit start and end.
+		// TODO(adonovan): opt: compute incrementally, avoid O(n^2).
+		start := strings.Count(content[:edit.Start], "\n")
+		end := strings.Count(content[:edit.End], "\n")
+		if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' {
+			end++ // EOF counts as an implicit newline
+		}
+
+		switch {
+		case h != nil && start == last:
+			//direct extension
+		case h != nil && start <= last+gap:
+			//within range of previous lines, add the joiners
+			addEqualLines(h, lines, last, start)
+		default:
+			//need to start a new hunk
+			if h != nil {
+				// add the edge to the previous hunk
+				addEqualLines(h, lines, last, last+contextLines)
+				u.hunks = append(u.hunks, h)
+			}
+			toLine += start - last
+			h = &hunk{
+				fromLine: start + 1,
+				toLine:   toLine + 1,
+			}
+			// add the edge to the new hunk
+			delta := addEqualLines(h, lines, start-contextLines, start)
+			h.fromLine -= delta
+			h.toLine -= delta
+		}
+		last = start
+		for i := start; i < end; i++ {
+			h.lines = append(h.lines, line{kind: opDelete, content: lines[i]})
+			last++
+		}
+		if edit.New != "" {
+			for _, content := range splitLines(edit.New) {
+				h.lines = append(h.lines, line{kind: opInsert, content: content})
+				toLine++
+			}
+		}
+	}
+	if h != nil {
+		// add the edge to the final hunk
+		addEqualLines(h, lines, last, last+contextLines)
+		u.hunks = append(u.hunks, h)
+	}
+	return u, nil
+}
+
+func splitLines(text string) []string {
+	lines := strings.SplitAfter(text, "\n")
+	if lines[len(lines)-1] == "" {
+		lines = lines[:len(lines)-1]
+	}
+	return lines
+}
+
+func addEqualLines(h *hunk, lines []string, start, end int) int {
+	delta := 0
+	for i := start; i < end; i++ {
+		if i < 0 {
+			continue
+		}
+		if i >= len(lines) {
+			return delta
+		}
+		h.lines = append(h.lines, line{kind: opEqual, content: lines[i]})
+		delta++
+	}
+	return delta
+}
+
+// String converts a unified diff to the standard textual form for that diff.
+// The output of this function can be passed to tools like patch.
+func (u unified) String() string {
+	if len(u.hunks) == 0 {
+		return ""
+	}
+	b := new(strings.Builder)
+	fmt.Fprintf(b, "--- %s\n", u.from)
+	fmt.Fprintf(b, "+++ %s\n", u.to)
+	for _, hunk := range u.hunks {
+		fromCount, toCount := 0, 0
+		for _, l := range hunk.lines {
+			switch l.kind {
+			case opDelete:
+				fromCount++
+			case opInsert:
+				toCount++
+			default:
+				fromCount++
+				toCount++
+			}
+		}
+		fmt.Fprint(b, "@@")
+		if fromCount > 1 {
+			fmt.Fprintf(b, " -%d,%d", hunk.fromLine, fromCount)
+		} else if hunk.fromLine == 1 && fromCount == 0 {
+			// Match odd GNU diff -u behavior adding to empty file.
+			fmt.Fprintf(b, " -0,0")
+		} else {
+			fmt.Fprintf(b, " -%d", hunk.fromLine)
+		}
+		if toCount > 1 {
+			fmt.Fprintf(b, " +%d,%d", hunk.toLine, toCount)
+		} else if hunk.toLine == 1 && toCount == 0 {
+			// Match odd GNU diff -u behavior adding to empty file.
+			fmt.Fprintf(b, " +0,0")
+		} else {
+			fmt.Fprintf(b, " +%d", hunk.toLine)
+		}
+		fmt.Fprint(b, " @@\n")
+		for _, l := range hunk.lines {
+			switch l.kind {
+			case opDelete:
+				fmt.Fprintf(b, "-%s", l.content)
+			case opInsert:
+				fmt.Fprintf(b, "+%s", l.content)
+			default:
+				fmt.Fprintf(b, " %s", l.content)
+			}
+			if !strings.HasSuffix(l.content, "\n") {
+				fmt.Fprintf(b, "\n\\ No newline at end of file\n")
+			}
+		}
+	}
+	return b.String()
+}