Skip to content

Merge of upstream patch #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@ This is a partial fork of [diff-match-patch][],
with extra bits to make this a modern, friendly
member of the Python packaging ecosystem. The
library will be periodically updated with changes
from the upstream project. If you would like to
contribute fixes or improvements to the library
itself, and not the packaging code, please submit
them to the upstream library directly.
from the upstream project.

Since August 2024, Google's diff-match-patch
library is archived, but there is a
[maintained fork][diff-match-patch-maintained].

If you would like to contribute fixes or
improvements to the library itself, and not the
packaging code, please submit them to the upstream
maintained fork library directly.

[diff-match-patch]: https://github.com/google/diff-match-patch
[diff-match-patch-maintained]: https://github.com/dmsnell/diff-match-patch
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

Google's [Diff Match and Patch][DMP] library, packaged for modern Python.

Since August 2024, Googles diff-match-patch library is archived, and
this project will now track the [maintained fork][diff-match-patch-maintained].

[![version](https://img.shields.io/pypi/v/diff-match-patch.svg)](https://pypi.org/project/diff-match-patch)
[![changelog](https://img.shields.io/badge/change-log-blue)](https://github.com/diff-match-patch-python/diff-match-patch/blob/main/CHANGELOG.md)
[![license](https://img.shields.io/pypi/l/diff-match-patch.svg)](https://github.com/diff-match-patch-python/diff-match-patch/blob/master/LICENSE)
Expand Down Expand Up @@ -82,3 +85,4 @@ This library also implements a [Bitap matching algorithm](https://neil.fraser.na

[DMP]: https://github.com/google/diff-match-patch
[API]: https://github.com/google/diff-match-patch/wiki/API
[diff-match-patch-maintained]: https://github.com/dmsnell/diff-match-patch
17 changes: 10 additions & 7 deletions diff_match_patch/diff_match_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,8 @@ def diff_prettyHtml(self, diffs):
"""
html = []
for op, data in diffs:
if 0 == len(data):
continue
text = (
data.replace("&", "&")
.replace("<", "&lt;")
Expand Down Expand Up @@ -1225,9 +1227,9 @@ def diff_toDelta(self, diffs):
data = data.encode("utf-8")
text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))
elif op == self.DIFF_DELETE:
text.append("-%d" % len(data))
text.append("-%d" % (len(data.encode("utf-16-be")) // 2))
elif op == self.DIFF_EQUAL:
text.append("=%d" % len(data))
text.append("=%d" % (len(data.encode("utf-16-be")) // 2))
return "\t".join(text)

def diff_fromDelta(self, text1, delta):
Expand All @@ -1245,7 +1247,8 @@ def diff_fromDelta(self, text1, delta):
ValueError: If invalid input.
"""
diffs = []
pointer = 0 # Cursor in text1
as_utf16 = text1.encode("utf-16-be")
pointer = 0 # Cursor in as_utf16
tokens = delta.split("\t")
for token in tokens:
if token == "":
Expand All @@ -1264,8 +1267,8 @@ def diff_fromDelta(self, text1, delta):
raise ValueError("Invalid number in diff_fromDelta: " + param)
if n < 0:
raise ValueError("Negative number in diff_fromDelta: " + param)
text = text1[pointer : pointer + n]
pointer += n
text = as_utf16[pointer : pointer + n * 2].decode("utf-16-be")
pointer += n * 2
if token[0] == "=":
diffs.append((self.DIFF_EQUAL, text))
else:
Expand All @@ -1275,10 +1278,10 @@ def diff_fromDelta(self, text1, delta):
raise ValueError(
"Invalid diff operation in diff_fromDelta: " + token[0]
)
if pointer != len(text1):
if pointer != len(as_utf16):
raise ValueError(
"Delta length (%d) does not equal source text length (%d)."
% (pointer, len(text1))
% (pointer, len(as_utf16))
)
return diffs

Expand Down
80 changes: 80 additions & 0 deletions diff_match_patch/tests/diff_match_patch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,16 @@ def testDiffDelta(self):
# Convert delta string into a diff.
self.assertEqual(diffs, self.dmp.diff_fromDelta(text1, delta))

diffs = self.dmp.diff_main(
"\U0001F64B\U0001F64B", "\U0001F64B\U0001F64C\U0001F64B"
)
delta = self.dmp.diff_toDelta(diffs)
self.assertEqual("=2\t+%F0%9F%99%8C\t=2", delta)
self.assertEqual(
diffs,
self.dmp.diff_fromDelta("\U0001F64B\U0001F64B", "=2\t+%F0%9F%99%8C\t=2"),
)

# Verify pool of unchanged characters.
diffs = [
(
Expand All @@ -849,6 +859,76 @@ def testDiffDelta(self):
# Convert delta string into a diff.
self.assertEqual(diffs, self.dmp.diff_fromDelta("", delta))

# Unicode: split surrogates
self.assertEqual(
self.dmp.diff_toDelta(
[
(self.dmp.DIFF_INSERT, "\U0001F171"),
(self.dmp.DIFF_EQUAL, "\U0001F170\U0001F171"),
]
),
self.dmp.diff_toDelta(
self.dmp.diff_main(
"\U0001F170\U0001F171", "\U0001F171\U0001F170\U0001F171"
)
),
"Inserting similar surrogate pair at beginning",
)
self.assertEqual(
self.dmp.diff_toDelta(
[
(self.dmp.DIFF_EQUAL, "\U0001F170"),
(self.dmp.DIFF_INSERT, "\U0001F172"),
(self.dmp.DIFF_EQUAL, "\U0001F171"),
]
),
self.dmp.diff_toDelta(
self.dmp.diff_main(
"\U0001F170\U0001F171", "\U0001F170\U0001F172\U0001F171"
)
),
"Inserting similar surrogate pair in the middle",
)
self.assertEqual(
self.dmp.diff_toDelta(
[
(self.dmp.DIFF_DELETE, "\U0001F171"),
(self.dmp.DIFF_EQUAL, "\U0001F170\U0001F171"),
]
),
self.dmp.diff_toDelta(
self.dmp.diff_main(
"\U0001F171\U0001F170\U0001F171", "\U0001F170\U0001F171"
)
),
"Deleting similar surogate pair at the beginning",
)
self.assertEqual(
self.dmp.diff_toDelta(
[
(self.dmp.DIFF_EQUAL, "\U0001F170"),
(self.dmp.DIFF_DELETE, "\U0001F172"),
(self.dmp.DIFF_EQUAL, "\U0001F171"),
]
),
self.dmp.diff_toDelta(
self.dmp.diff_main(
"\U0001F170\U0001F172\U0001F171", "\U0001F170\U0001F171"
)
),
"Deleting similar surogate pair in the middle",
)
self.assertEqual(
self.dmp.diff_toDelta(
[
(self.dmp.DIFF_DELETE, "\U0001F170"),
(self.dmp.DIFF_INSERT, "\U0001F171"),
]
),
self.dmp.diff_toDelta(self.dmp.diff_main("\U0001F170", "\U0001F171")),
"Swap surrogate pair",
)

# 160 kb string.
a = "abcdefghij"
for i in range(14):
Expand Down
Loading