Skip to content

Commit ce721e0

Browse files
authored
Merge pull request #17 from matthewhegarty/upstream-patch
Merge of upstream patch
2 parents 0083470 + 325766c commit ce721e0

File tree

4 files changed

+105
-11
lines changed

4 files changed

+105
-11
lines changed

CONTRIBUTING.md

+11-4
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,16 @@ This is a partial fork of [diff-match-patch][],
1616
with extra bits to make this a modern, friendly
1717
member of the Python packaging ecosystem. The
1818
library will be periodically updated with changes
19-
from the upstream project. If you would like to
20-
contribute fixes or improvements to the library
21-
itself, and not the packaging code, please submit
22-
them to the upstream library directly.
19+
from the upstream project.
20+
21+
Since August 2024, Google's diff-match-patch
22+
library is archived, but there is a
23+
[maintained fork][diff-match-patch-maintained].
24+
25+
If you would like to contribute fixes or
26+
improvements to the library itself, and not the
27+
packaging code, please submit them to the upstream
28+
maintained fork library directly.
2329

2430
[diff-match-patch]: https://github.com/google/diff-match-patch
31+
[diff-match-patch-maintained]: https://github.com/dmsnell/diff-match-patch

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
Google's [Diff Match and Patch][DMP] library, packaged for modern Python.
44

5+
Since August 2024, Googles diff-match-patch library is archived, and
6+
this project will now track the [maintained fork][diff-match-patch-maintained].
7+
58
[![version](https://img.shields.io/pypi/v/diff-match-patch.svg)](https://pypi.org/project/diff-match-patch)
69
[![changelog](https://img.shields.io/badge/change-log-blue)](https://github.com/diff-match-patch-python/diff-match-patch/blob/main/CHANGELOG.md)
710
[![license](https://img.shields.io/pypi/l/diff-match-patch.svg)](https://github.com/diff-match-patch-python/diff-match-patch/blob/master/LICENSE)
@@ -82,3 +85,4 @@ This library also implements a [Bitap matching algorithm](https://neil.fraser.na
8285

8386
[DMP]: https://github.com/google/diff-match-patch
8487
[API]: https://github.com/google/diff-match-patch/wiki/API
88+
[diff-match-patch-maintained]: https://github.com/dmsnell/diff-match-patch

diff_match_patch/diff_match_patch.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,8 @@ def diff_prettyHtml(self, diffs):
11361136
"""
11371137
html = []
11381138
for op, data in diffs:
1139+
if 0 == len(data):
1140+
continue
11391141
text = (
11401142
data.replace("&", "&")
11411143
.replace("<", "&lt;")
@@ -1225,9 +1227,9 @@ def diff_toDelta(self, diffs):
12251227
data = data.encode("utf-8")
12261228
text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))
12271229
elif op == self.DIFF_DELETE:
1228-
text.append("-%d" % len(data))
1230+
text.append("-%d" % (len(data.encode("utf-16-be")) // 2))
12291231
elif op == self.DIFF_EQUAL:
1230-
text.append("=%d" % len(data))
1232+
text.append("=%d" % (len(data.encode("utf-16-be")) // 2))
12311233
return "\t".join(text)
12321234

12331235
def diff_fromDelta(self, text1, delta):
@@ -1245,7 +1247,8 @@ def diff_fromDelta(self, text1, delta):
12451247
ValueError: If invalid input.
12461248
"""
12471249
diffs = []
1248-
pointer = 0 # Cursor in text1
1250+
as_utf16 = text1.encode("utf-16-be")
1251+
pointer = 0 # Cursor in as_utf16
12491252
tokens = delta.split("\t")
12501253
for token in tokens:
12511254
if token == "":
@@ -1264,8 +1267,8 @@ def diff_fromDelta(self, text1, delta):
12641267
raise ValueError("Invalid number in diff_fromDelta: " + param)
12651268
if n < 0:
12661269
raise ValueError("Negative number in diff_fromDelta: " + param)
1267-
text = text1[pointer : pointer + n]
1268-
pointer += n
1270+
text = as_utf16[pointer : pointer + n * 2].decode("utf-16-be")
1271+
pointer += n * 2
12691272
if token[0] == "=":
12701273
diffs.append((self.DIFF_EQUAL, text))
12711274
else:
@@ -1275,10 +1278,10 @@ def diff_fromDelta(self, text1, delta):
12751278
raise ValueError(
12761279
"Invalid diff operation in diff_fromDelta: " + token[0]
12771280
)
1278-
if pointer != len(text1):
1281+
if pointer != len(as_utf16):
12791282
raise ValueError(
12801283
"Delta length (%d) does not equal source text length (%d)."
1281-
% (pointer, len(text1))
1284+
% (pointer, len(as_utf16))
12821285
)
12831286
return diffs
12841287

diff_match_patch/tests/diff_match_patch_test.py

+80
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,16 @@ def testDiffDelta(self):
833833
# Convert delta string into a diff.
834834
self.assertEqual(diffs, self.dmp.diff_fromDelta(text1, delta))
835835

836+
diffs = self.dmp.diff_main(
837+
"\U0001F64B\U0001F64B", "\U0001F64B\U0001F64C\U0001F64B"
838+
)
839+
delta = self.dmp.diff_toDelta(diffs)
840+
self.assertEqual("=2\t+%F0%9F%99%8C\t=2", delta)
841+
self.assertEqual(
842+
diffs,
843+
self.dmp.diff_fromDelta("\U0001F64B\U0001F64B", "=2\t+%F0%9F%99%8C\t=2"),
844+
)
845+
836846
# Verify pool of unchanged characters.
837847
diffs = [
838848
(
@@ -849,6 +859,76 @@ def testDiffDelta(self):
849859
# Convert delta string into a diff.
850860
self.assertEqual(diffs, self.dmp.diff_fromDelta("", delta))
851861

862+
# Unicode: split surrogates
863+
self.assertEqual(
864+
self.dmp.diff_toDelta(
865+
[
866+
(self.dmp.DIFF_INSERT, "\U0001F171"),
867+
(self.dmp.DIFF_EQUAL, "\U0001F170\U0001F171"),
868+
]
869+
),
870+
self.dmp.diff_toDelta(
871+
self.dmp.diff_main(
872+
"\U0001F170\U0001F171", "\U0001F171\U0001F170\U0001F171"
873+
)
874+
),
875+
"Inserting similar surrogate pair at beginning",
876+
)
877+
self.assertEqual(
878+
self.dmp.diff_toDelta(
879+
[
880+
(self.dmp.DIFF_EQUAL, "\U0001F170"),
881+
(self.dmp.DIFF_INSERT, "\U0001F172"),
882+
(self.dmp.DIFF_EQUAL, "\U0001F171"),
883+
]
884+
),
885+
self.dmp.diff_toDelta(
886+
self.dmp.diff_main(
887+
"\U0001F170\U0001F171", "\U0001F170\U0001F172\U0001F171"
888+
)
889+
),
890+
"Inserting similar surrogate pair in the middle",
891+
)
892+
self.assertEqual(
893+
self.dmp.diff_toDelta(
894+
[
895+
(self.dmp.DIFF_DELETE, "\U0001F171"),
896+
(self.dmp.DIFF_EQUAL, "\U0001F170\U0001F171"),
897+
]
898+
),
899+
self.dmp.diff_toDelta(
900+
self.dmp.diff_main(
901+
"\U0001F171\U0001F170\U0001F171", "\U0001F170\U0001F171"
902+
)
903+
),
904+
"Deleting similar surogate pair at the beginning",
905+
)
906+
self.assertEqual(
907+
self.dmp.diff_toDelta(
908+
[
909+
(self.dmp.DIFF_EQUAL, "\U0001F170"),
910+
(self.dmp.DIFF_DELETE, "\U0001F172"),
911+
(self.dmp.DIFF_EQUAL, "\U0001F171"),
912+
]
913+
),
914+
self.dmp.diff_toDelta(
915+
self.dmp.diff_main(
916+
"\U0001F170\U0001F172\U0001F171", "\U0001F170\U0001F171"
917+
)
918+
),
919+
"Deleting similar surogate pair in the middle",
920+
)
921+
self.assertEqual(
922+
self.dmp.diff_toDelta(
923+
[
924+
(self.dmp.DIFF_DELETE, "\U0001F170"),
925+
(self.dmp.DIFF_INSERT, "\U0001F171"),
926+
]
927+
),
928+
self.dmp.diff_toDelta(self.dmp.diff_main("\U0001F170", "\U0001F171")),
929+
"Swap surrogate pair",
930+
)
931+
852932
# 160 kb string.
853933
a = "abcdefghij"
854934
for i in range(14):

0 commit comments

Comments
 (0)