diff --git a/java/src/name/fraser/neil/plaintext/diff_match_patch.java b/java/src/name/fraser/neil/plaintext/diff_match_patch.java index 9d07867d..fe5b55da 100644 --- a/java/src/name/fraser/neil/plaintext/diff_match_patch.java +++ b/java/src/name/fraser/neil/plaintext/diff_match_patch.java @@ -19,6 +19,7 @@ package name.fraser.neil.plaintext; import java.io.UnsupportedEncodingException; +import java.lang.Character; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.*; @@ -1429,7 +1430,31 @@ public int diff_levenshtein(List<Diff> diffs) { */ public String diff_toDelta(List<Diff> diffs) { StringBuilder text = new StringBuilder(); + char lastEnd = 0; + boolean isFirst = true; for (Diff aDiff : diffs) { + if (aDiff.text.isEmpty()) { + continue; + } + + char thisTop = aDiff.text.charAt(0); + char thisEnd = aDiff.text.charAt(aDiff.text.length() - 1); + + if (Character.isHighSurrogate(thisEnd)) { + lastEnd = thisEnd; + aDiff.text = aDiff.text.substring(0, aDiff.text.length() - 1); + } + + if (! isFirst && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) { + aDiff.text = lastEnd + aDiff.text; + } + + isFirst = false; + + if ( aDiff.text.isEmpty() ) { + continue; + } + switch (aDiff.operation) { case INSERT: try { @@ -1457,6 +1482,103 @@ public String diff_toDelta(List<Diff> diffs) { return delta; } + private int digit16(char b) throws IllegalArgumentException { + switch (b) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'A': case 'a': return 10; + case 'B': case 'b': return 11; + case 'C': case 'c': return 12; + case 'D': case 'd': return 13; + case 'E': case 'e': return 14; + case 'F': case 'f': return 15; + default: + throw new IllegalArgumentException(); + } + } + + private String decodeURI(String text) throws IllegalArgumentException { + int i = 0; + StringBuilder decoded = new StringBuilder(text.length()); + + while (i < text.length()) { + if (text.charAt(i) != '%') { + decoded.append(text.charAt(i++)); + continue; + } + + // start a percent-sequence + int byte1 = (digit16(text.charAt(i + 1)) << 4) + digit16(text.charAt(i + 2)); + if ((byte1 & 0x80) == 0) { + decoded.append(Character.toChars(byte1)); + i += 3; + continue; + } + + if ( text.charAt(i + 3) != '%') { + throw new IllegalArgumentException(); + } + + int byte2 = (digit16(text.charAt(i + 4)) << 4) + digit16(text.charAt(i + 5)); + if ((byte2 & 0xC0) != 0x80) { + throw new IllegalArgumentException(); + } + byte2 = byte2 & 0x3F; + if ((byte1 & 0xE0) == 0xC0) { + decoded.append(Character.toChars(((byte1 & 0x1F) << 6) | byte2)); + i += 6; + continue; + } + + if (text.charAt(i + 6) != '%') { + throw new IllegalArgumentException(); + } + + int byte3 = (digit16(text.charAt(i + 7)) << 4) + digit16(text.charAt(i + 8)); + if ((byte3 & 0xC0) != 0x80) { + throw new IllegalArgumentException(); + } + byte3 = byte3 & 0x3F; + if ((byte1 & 0xF0) == 0xE0) { + // unpaired surrogate are fine here + decoded.append(Character.toChars(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3)); + i += 9; + continue; + } + + if (text.charAt(i + 9) != '%') { + throw new IllegalArgumentException(); + } + + int byte4 = (digit16(text.charAt(i + 10)) << 4) + digit16(text.charAt(i + 11)); + if ((byte4 & 0xC0) != 0x80) { + throw new IllegalArgumentException(); + } + byte4 = byte4 & 0x3F; + if ((byte1 & 0xF8) == 0xF0) { + int codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4; + if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) { + decoded.append(Character.toChars((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800)); + decoded.append(Character.toChars(0xDC00 | (codePoint & 0xFFFF) & 0x3FF)); + i += 12; + continue; + } + } + + throw new IllegalArgumentException(); + } + + return decoded.toString(); + } + /** * Given the original text1, and an encoded string which describes the * operations required to transform text1 into text2, compute the full diff. @@ -1483,10 +1605,7 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta) // decode would change all "+" to " " param = param.replace("+", "%2B"); try { - param = URLDecoder.decode(param, "UTF-8"); - } catch (UnsupportedEncodingException e) { - // Not likely on modern system. - throw new Error("This system does not support UTF-8.", e); + param = this.decodeURI(param); } catch (IllegalArgumentException e) { // Malformed URI sequence. throw new IllegalArgumentException( @@ -2269,10 +2388,7 @@ public List<Patch> patch_fromText(String textline) line = text.getFirst().substring(1); line = line.replace("+", "%2B"); // decode would change all "+" to " " try { - line = URLDecoder.decode(line, "UTF-8"); - } catch (UnsupportedEncodingException e) { - // Not likely on modern system. - throw new Error("This system does not support UTF-8.", e); + line = this.decodeURI(line); } catch (IllegalArgumentException e) { // Malformed URI sequence. throw new IllegalArgumentException( diff --git a/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java b/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java index 2f387933..5be10f13 100644 --- a/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java +++ b/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java @@ -424,6 +424,42 @@ public static void testDiffDelta() { assertEquals("diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta)); + diffs = diffList(new Diff(EQUAL, "\ud83d\ude4b\ud83d"), new Diff(INSERT, "\ude4c\ud83d"), new Diff(EQUAL, "\ude4b")); + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Surrogate Pairs.", "=2\t+%F0%9F%99%8C\t=2", delta); + + assertEquals( + "diff_toDelta: insert surrogate pair between similar high surrogates", + dmp.diff_toDelta(diffList(new Diff(EQUAL, "\ud83c\udd70"), new Diff(INSERT, "\ud83c\udd70"), new Diff(EQUAL, "\ud83c\udd71"))), + dmp.diff_toDelta(diffList(new Diff(EQUAL, "\ud83c\udd70\ud83c"), new Diff(INSERT, "\udd70\ud83c"), new Diff(EQUAL, "\udd71"))) + ); + + assertEquals( + "diff_toDelta: swap surrogate pairs delete/insert", + dmp.diff_toDelta(diffList(new Diff(DELETE, "\ud83c\udd70"), new Diff(INSERT, "\ud83c\udd71"))), + dmp.diff_toDelta(diffList(new Diff(EQUAL, "\ud83c"), new Diff(DELETE, "\udd70"), new Diff(INSERT, "\udd71"))) + ); + + assertEquals( + "diff_toDelta: swap surrogate pairs insert/delete", + dmp.diff_toDelta(diffList(new Diff(INSERT, "\ud83c\udd70"), new Diff(DELETE, "\ud83c\udd71"))), + dmp.diff_toDelta(diffList(new Diff(EQUAL, "\ud83c"), new Diff(INSERT, "\udd70"), new Diff(DELETE, "\udd71"))) + ); + + assertEquals( + "diff_toDelta: empty diff groups", + dmp.diff_toDelta(diffList(new Diff(EQUAL, "abcdef"), new Diff(DELETE, ""), new Diff(INSERT, "ghijk"))), + dmp.diff_toDelta(diffList(new Diff(EQUAL, "abcdef"), new Diff(INSERT, "ghijk"))) + ); + + // Different versions of the library may have created deltas with + // half of a surrogate pair encoded as if it were valid UTF-8 + assertEquals( + "diff_toDelta: surrogate half encoded as UTF8", + dmp.diff_toDelta(dmp.diff_fromDelta("\ud83c\udd70", "-2\t+%F0%9F%85%B1")), + dmp.diff_toDelta(dmp.diff_fromDelta("\ud83c\udd70", "=1\t-1\t+%ED%B5%B1")) + ); + // Verify pool of unchanged characters. diffs = diffList(new Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); String text2 = dmp.diff_text2(diffs); diff --git a/javascript/diff_match_patch.js b/javascript/diff_match_patch.js index 2fe320a1..8f33865a 100644 --- a/javascript/diff_match_patch.js +++ b/javascript/diff_match_patch.js @@ -1,55 +1,59 @@ var diff_match_patch=function(){this.Diff_Timeout=1;this.Diff_EditCost=4;this.Match_Threshold=.5;this.Match_Distance=1E3;this.Patch_DeleteThreshold=.5;this.Patch_Margin=4;this.Match_MaxBits=32},DIFF_DELETE=-1,DIFF_INSERT=1,DIFF_EQUAL=0;diff_match_patch.Diff=function(a,b){this[0]=a;this[1]=b};diff_match_patch.Diff.prototype.length=2;diff_match_patch.Diff.prototype.toString=function(){return this[0]+","+this[1]}; -diff_match_patch.prototype.diff_main=function(a,b,c,d){"undefined"==typeof d&&(d=0>=this.Diff_Timeout?Number.MAX_VALUE:(new Date).getTime()+1E3*this.Diff_Timeout);if(null==a||null==b)throw Error("Null input. (diff_main)");if(a==b)return a?[new diff_match_patch.Diff(DIFF_EQUAL,a)]:[];"undefined"==typeof c&&(c=!0);var e=c,f=this.diff_commonPrefix(a,b);c=a.substring(0,f);a=a.substring(f);b=b.substring(f);f=this.diff_commonSuffix(a,b);var g=a.substring(a.length-f);a=a.substring(0,a.length-f);b=b.substring(0, -b.length-f);a=this.diff_compute_(a,b,e,d);c&&a.unshift(new diff_match_patch.Diff(DIFF_EQUAL,c));g&&a.push(new diff_match_patch.Diff(DIFF_EQUAL,g));this.diff_cleanupMerge(a);return a}; -diff_match_patch.prototype.diff_compute_=function(a,b,c,d){if(!a)return[new diff_match_patch.Diff(DIFF_INSERT,b)];if(!b)return[new diff_match_patch.Diff(DIFF_DELETE,a)];var e=a.length>b.length?a:b,f=a.length>b.length?b:a,g=e.indexOf(f);return-1!=g?(c=[new diff_match_patch.Diff(DIFF_INSERT,e.substring(0,g)),new diff_match_patch.Diff(DIFF_EQUAL,f),new diff_match_patch.Diff(DIFF_INSERT,e.substring(g+f.length))],a.length>b.length&&(c[0][0]=c[2][0]=DIFF_DELETE),c):1==f.length?[new diff_match_patch.Diff(DIFF_DELETE, -a),new diff_match_patch.Diff(DIFF_INSERT,b)]:(e=this.diff_halfMatch_(a,b))?(b=e[1],f=e[3],a=e[4],e=this.diff_main(e[0],e[2],c,d),c=this.diff_main(b,f,c,d),e.concat([new diff_match_patch.Diff(DIFF_EQUAL,a)],c)):c&&100<a.length&&100<b.length?this.diff_lineMode_(a,b,d):this.diff_bisect_(a,b,d)}; -diff_match_patch.prototype.diff_lineMode_=function(a,b,c){var d=this.diff_linesToChars_(a,b);a=d.chars1;b=d.chars2;d=d.lineArray;a=this.diff_main(a,b,!1,c);this.diff_charsToLines_(a,d);this.diff_cleanupSemantic(a);a.push(new diff_match_patch.Diff(DIFF_EQUAL,""));for(var e=d=b=0,f="",g="";b<a.length;){switch(a[b][0]){case DIFF_INSERT:e++;g+=a[b][1];break;case DIFF_DELETE:d++;f+=a[b][1];break;case DIFF_EQUAL:if(1<=d&&1<=e){a.splice(b-d-e,d+e);b=b-d-e;d=this.diff_main(f,g,!1,c);for(e=d.length-1;0<=e;e--)a.splice(b, -0,d[e]);b+=d.length}d=e=0;g=f=""}b++}a.pop();return a}; -diff_match_patch.prototype.diff_bisect_=function(a,b,c){for(var d=a.length,e=b.length,f=Math.ceil((d+e)/2),g=2*f,h=Array(g),l=Array(g),k=0;k<g;k++)h[k]=-1,l[k]=-1;h[f+1]=0;l[f+1]=0;k=d-e;for(var m=0!=k%2,p=0,x=0,w=0,q=0,t=0;t<f&&!((new Date).getTime()>c);t++){for(var v=-t+p;v<=t-x;v+=2){var n=f+v;var r=v==-t||v!=t&&h[n-1]<h[n+1]?h[n+1]:h[n-1]+1;for(var y=r-v;r<d&&y<e&&a.charAt(r)==b.charAt(y);)r++,y++;h[n]=r;if(r>d)x+=2;else if(y>e)p+=2;else if(m&&(n=f+k-v,0<=n&&n<g&&-1!=l[n])){var u=d-l[n];if(r>= -u)return this.diff_bisectSplit_(a,b,r,y,c)}}for(v=-t+w;v<=t-q;v+=2){n=f+v;u=v==-t||v!=t&&l[n-1]<l[n+1]?l[n+1]:l[n-1]+1;for(r=u-v;u<d&&r<e&&a.charAt(d-u-1)==b.charAt(e-r-1);)u++,r++;l[n]=u;if(u>d)q+=2;else if(r>e)w+=2;else if(!m&&(n=f+k-v,0<=n&&n<g&&-1!=h[n]&&(r=h[n],y=f+r-n,u=d-u,r>=u)))return this.diff_bisectSplit_(a,b,r,y,c)}}return[new diff_match_patch.Diff(DIFF_DELETE,a),new diff_match_patch.Diff(DIFF_INSERT,b)]}; -diff_match_patch.prototype.diff_bisectSplit_=function(a,b,c,d,e){var f=a.substring(0,c),g=b.substring(0,d);a=a.substring(c);b=b.substring(d);f=this.diff_main(f,g,!1,e);e=this.diff_main(a,b,!1,e);return f.concat(e)}; -diff_match_patch.prototype.diff_linesToChars_=function(a,b){function c(a){for(var b="",c=0,g=-1,h=d.length;g<a.length-1;){g=a.indexOf("\n",c);-1==g&&(g=a.length-1);var l=a.substring(c,g+1);(e.hasOwnProperty?e.hasOwnProperty(l):void 0!==e[l])?b+=String.fromCharCode(e[l]):(h==f&&(l=a.substring(c),g=a.length),b+=String.fromCharCode(h),e[l]=h,d[h++]=l);c=g+1}return b}var d=[],e={};d[0]="";var f=4E4,g=c(a);f=65535;var h=c(b);return{chars1:g,chars2:h,lineArray:d}}; -diff_match_patch.prototype.diff_charsToLines_=function(a,b){for(var c=0;c<a.length;c++){for(var d=a[c][1],e=[],f=0;f<d.length;f++)e[f]=b[d.charCodeAt(f)];a[c][1]=e.join("")}};diff_match_patch.prototype.diff_commonPrefix=function(a,b){if(!a||!b||a.charAt(0)!=b.charAt(0))return 0;for(var c=0,d=Math.min(a.length,b.length),e=d,f=0;c<e;)a.substring(f,e)==b.substring(f,e)?f=c=e:d=e,e=Math.floor((d-c)/2+c);return e}; -diff_match_patch.prototype.diff_commonSuffix=function(a,b){if(!a||!b||a.charAt(a.length-1)!=b.charAt(b.length-1))return 0;for(var c=0,d=Math.min(a.length,b.length),e=d,f=0;c<e;)a.substring(a.length-e,a.length-f)==b.substring(b.length-e,b.length-f)?f=c=e:d=e,e=Math.floor((d-c)/2+c);return e}; -diff_match_patch.prototype.diff_commonOverlap_=function(a,b){var c=a.length,d=b.length;if(0==c||0==d)return 0;c>d?a=a.substring(c-d):c<d&&(b=b.substring(0,c));c=Math.min(c,d);if(a==b)return c;d=0;for(var e=1;;){var f=a.substring(c-e);f=b.indexOf(f);if(-1==f)return d;e+=f;if(0==f||a.substring(c-e)==b.substring(0,e))d=e,e++}}; -diff_match_patch.prototype.diff_halfMatch_=function(a,b){function c(a,b,c){for(var d=a.substring(c,c+Math.floor(a.length/4)),e=-1,g="",h,k,l,m;-1!=(e=b.indexOf(d,e+1));){var p=f.diff_commonPrefix(a.substring(c),b.substring(e)),u=f.diff_commonSuffix(a.substring(0,c),b.substring(0,e));g.length<u+p&&(g=b.substring(e-u,e)+b.substring(e,e+p),h=a.substring(0,c-u),k=a.substring(c+p),l=b.substring(0,e-u),m=b.substring(e+p))}return 2*g.length>=a.length?[h,k,l,m,g]:null}if(0>=this.Diff_Timeout)return null; -var d=a.length>b.length?a:b,e=a.length>b.length?b:a;if(4>d.length||2*e.length<d.length)return null;var f=this,g=c(d,e,Math.ceil(d.length/4));d=c(d,e,Math.ceil(d.length/2));if(g||d)g=d?g?g[4].length>d[4].length?g:d:d:g;else return null;if(a.length>b.length){d=g[0];e=g[1];var h=g[2];var l=g[3]}else h=g[0],l=g[1],d=g[2],e=g[3];return[d,e,h,l,g[4]]}; -diff_match_patch.prototype.diff_cleanupSemantic=function(a){for(var b=!1,c=[],d=0,e=null,f=0,g=0,h=0,l=0,k=0;f<a.length;)a[f][0]==DIFF_EQUAL?(c[d++]=f,g=l,h=k,k=l=0,e=a[f][1]):(a[f][0]==DIFF_INSERT?l+=a[f][1].length:k+=a[f][1].length,e&&e.length<=Math.max(g,h)&&e.length<=Math.max(l,k)&&(a.splice(c[d-1],0,new diff_match_patch.Diff(DIFF_DELETE,e)),a[c[d-1]+1][0]=DIFF_INSERT,d--,d--,f=0<d?c[d-1]:-1,k=l=h=g=0,e=null,b=!0)),f++;b&&this.diff_cleanupMerge(a);this.diff_cleanupSemanticLossless(a);for(f=1;f< -a.length;){if(a[f-1][0]==DIFF_DELETE&&a[f][0]==DIFF_INSERT){b=a[f-1][1];c=a[f][1];d=this.diff_commonOverlap_(b,c);e=this.diff_commonOverlap_(c,b);if(d>=e){if(d>=b.length/2||d>=c.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,c.substring(0,d))),a[f-1][1]=b.substring(0,b.length-d),a[f+1][1]=c.substring(d),f++}else if(e>=b.length/2||e>=c.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,b.substring(0,e))),a[f-1][0]=DIFF_INSERT,a[f-1][1]=c.substring(0,c.length-e),a[f+1][0]=DIFF_DELETE, +diff_match_patch.prototype.diff_main=function(a,b,d,c){"undefined"==typeof c&&(c=0>=this.Diff_Timeout?Number.MAX_VALUE:(new Date).getTime()+1E3*this.Diff_Timeout);if(null==a||null==b)throw Error("Null input. (diff_main)");if(a==b)return a?[new diff_match_patch.Diff(DIFF_EQUAL,a)]:[];"undefined"==typeof d&&(d=!0);var e=d,f=this.diff_commonPrefix(a,b);d=a.substring(0,f);a=a.substring(f);b=b.substring(f);f=this.diff_commonSuffix(a,b);var g=a.substring(a.length-f);a=a.substring(0,a.length-f);b=b.substring(0, +b.length-f);a=this.diff_compute_(a,b,e,c);d&&a.unshift(new diff_match_patch.Diff(DIFF_EQUAL,d));g&&a.push(new diff_match_patch.Diff(DIFF_EQUAL,g));this.diff_cleanupMerge(a);return a}; +diff_match_patch.prototype.diff_compute_=function(a,b,d,c){if(!a)return[new diff_match_patch.Diff(DIFF_INSERT,b)];if(!b)return[new diff_match_patch.Diff(DIFF_DELETE,a)];var e=a.length>b.length?a:b,f=a.length>b.length?b:a,g=e.indexOf(f);return-1!=g?(d=[new diff_match_patch.Diff(DIFF_INSERT,e.substring(0,g)),new diff_match_patch.Diff(DIFF_EQUAL,f),new diff_match_patch.Diff(DIFF_INSERT,e.substring(g+f.length))],a.length>b.length&&(d[0][0]=d[2][0]=DIFF_DELETE),d):1==f.length?[new diff_match_patch.Diff(DIFF_DELETE, +a),new diff_match_patch.Diff(DIFF_INSERT,b)]:(e=this.diff_halfMatch_(a,b))?(b=e[1],f=e[3],a=e[4],e=this.diff_main(e[0],e[2],d,c),d=this.diff_main(b,f,d,c),e.concat([new diff_match_patch.Diff(DIFF_EQUAL,a)],d)):d&&100<a.length&&100<b.length?this.diff_lineMode_(a,b,c):this.diff_bisect_(a,b,c)}; +diff_match_patch.prototype.diff_lineMode_=function(a,b,d){var c=this.diff_linesToChars_(a,b);a=c.chars1;b=c.chars2;c=c.lineArray;a=this.diff_main(a,b,!1,d);this.diff_charsToLines_(a,c);this.diff_cleanupSemantic(a);a.push(new diff_match_patch.Diff(DIFF_EQUAL,""));for(var e=c=b=0,f="",g="";b<a.length;){switch(a[b][0]){case DIFF_INSERT:e++;g+=a[b][1];break;case DIFF_DELETE:c++;f+=a[b][1];break;case DIFF_EQUAL:if(1<=c&&1<=e){a.splice(b-c-e,c+e);b=b-c-e;c=this.diff_main(f,g,!1,d);for(e=c.length-1;0<=e;e--)a.splice(b, +0,c[e]);b+=c.length}c=e=0;g=f=""}b++}a.pop();return a}; +diff_match_patch.prototype.diff_bisect_=function(a,b,d){for(var c=a.length,e=b.length,f=Math.ceil((c+e)/2),g=2*f,h=Array(g),l=Array(g),k=0;k<g;k++)h[k]=-1,l[k]=-1;h[f+1]=0;l[f+1]=0;k=c-e;for(var m=0!=k%2,p=0,x=0,w=0,q=0,t=0;t<f&&!((new Date).getTime()>d);t++){for(var v=-t+p;v<=t-x;v+=2){var n=f+v;var r=v==-t||v!=t&&h[n-1]<h[n+1]?h[n+1]:h[n-1]+1;for(var y=r-v;r<c&&y<e&&a.charAt(r)==b.charAt(y);)r++,y++;h[n]=r;if(r>c)x+=2;else if(y>e)p+=2;else if(m&&(n=f+k-v,0<=n&&n<g&&-1!=l[n])){var u=c-l[n];if(r>= +u)return this.diff_bisectSplit_(a,b,r,y,d)}}for(v=-t+w;v<=t-q;v+=2){n=f+v;u=v==-t||v!=t&&l[n-1]<l[n+1]?l[n+1]:l[n-1]+1;for(r=u-v;u<c&&r<e&&a.charAt(c-u-1)==b.charAt(e-r-1);)u++,r++;l[n]=u;if(u>c)q+=2;else if(r>e)w+=2;else if(!m&&(n=f+k-v,0<=n&&n<g&&-1!=h[n]&&(r=h[n],y=f+r-n,u=c-u,r>=u)))return this.diff_bisectSplit_(a,b,r,y,d)}}return[new diff_match_patch.Diff(DIFF_DELETE,a),new diff_match_patch.Diff(DIFF_INSERT,b)]}; +diff_match_patch.prototype.diff_bisectSplit_=function(a,b,d,c,e){var f=a.substring(0,d),g=b.substring(0,c);a=a.substring(d);b=b.substring(c);f=this.diff_main(f,g,!1,e);e=this.diff_main(a,b,!1,e);return f.concat(e)}; +diff_match_patch.prototype.diff_linesToChars_=function(a,b){function d(a){for(var b="",d=0,g=-1,h=c.length;g<a.length-1;){g=a.indexOf("\n",d);-1==g&&(g=a.length-1);var l=a.substring(d,g+1);(e.hasOwnProperty?e.hasOwnProperty(l):void 0!==e[l])?b+=String.fromCharCode(e[l]):(h==f&&(l=a.substring(d),g=a.length),b+=String.fromCharCode(h),e[l]=h,c[h++]=l);d=g+1}return b}var c=[],e={};c[0]="";var f=4E4,g=d(a);f=65535;var h=d(b);return{chars1:g,chars2:h,lineArray:c}}; +diff_match_patch.prototype.diff_charsToLines_=function(a,b){for(var d=0;d<a.length;d++){for(var c=a[d][1],e=[],f=0;f<c.length;f++)e[f]=b[c.charCodeAt(f)];a[d][1]=e.join("")}};diff_match_patch.prototype.diff_commonPrefix=function(a,b){if(!a||!b||a.charAt(0)!=b.charAt(0))return 0;for(var d=0,c=Math.min(a.length,b.length),e=c,f=0;d<e;)a.substring(f,e)==b.substring(f,e)?f=d=e:c=e,e=Math.floor((c-d)/2+d);return e}; +diff_match_patch.prototype.diff_commonSuffix=function(a,b){if(!a||!b||a.charAt(a.length-1)!=b.charAt(b.length-1))return 0;for(var d=0,c=Math.min(a.length,b.length),e=c,f=0;d<e;)a.substring(a.length-e,a.length-f)==b.substring(b.length-e,b.length-f)?f=d=e:c=e,e=Math.floor((c-d)/2+d);return e}; +diff_match_patch.prototype.diff_commonOverlap_=function(a,b){var d=a.length,c=b.length;if(0==d||0==c)return 0;d>c?a=a.substring(d-c):d<c&&(b=b.substring(0,d));d=Math.min(d,c);if(a==b)return d;c=0;for(var e=1;;){var f=a.substring(d-e);f=b.indexOf(f);if(-1==f)return c;e+=f;if(0==f||a.substring(d-e)==b.substring(0,e))c=e,e++}}; +diff_match_patch.prototype.diff_halfMatch_=function(a,b){function d(a,b,c){for(var d=a.substring(c,c+Math.floor(a.length/4)),e=-1,g="",h,k,l,m;-1!=(e=b.indexOf(d,e+1));){var p=f.diff_commonPrefix(a.substring(c),b.substring(e)),u=f.diff_commonSuffix(a.substring(0,c),b.substring(0,e));g.length<u+p&&(g=b.substring(e-u,e)+b.substring(e,e+p),h=a.substring(0,c-u),k=a.substring(c+p),l=b.substring(0,e-u),m=b.substring(e+p))}return 2*g.length>=a.length?[h,k,l,m,g]:null}if(0>=this.Diff_Timeout)return null; +var c=a.length>b.length?a:b,e=a.length>b.length?b:a;if(4>c.length||2*e.length<c.length)return null;var f=this,g=d(c,e,Math.ceil(c.length/4));c=d(c,e,Math.ceil(c.length/2));if(g||c)g=c?g?g[4].length>c[4].length?g:c:c:g;else return null;if(a.length>b.length){c=g[0];e=g[1];var h=g[2];var l=g[3]}else h=g[0],l=g[1],c=g[2],e=g[3];return[c,e,h,l,g[4]]}; +diff_match_patch.prototype.diff_cleanupSemantic=function(a){for(var b=!1,d=[],c=0,e=null,f=0,g=0,h=0,l=0,k=0;f<a.length;)a[f][0]==DIFF_EQUAL?(d[c++]=f,g=l,h=k,k=l=0,e=a[f][1]):(a[f][0]==DIFF_INSERT?l+=a[f][1].length:k+=a[f][1].length,e&&e.length<=Math.max(g,h)&&e.length<=Math.max(l,k)&&(a.splice(d[c-1],0,new diff_match_patch.Diff(DIFF_DELETE,e)),a[d[c-1]+1][0]=DIFF_INSERT,c--,c--,f=0<c?d[c-1]:-1,k=l=h=g=0,e=null,b=!0)),f++;b&&this.diff_cleanupMerge(a);this.diff_cleanupSemanticLossless(a);for(f=1;f< +a.length;){if(a[f-1][0]==DIFF_DELETE&&a[f][0]==DIFF_INSERT){b=a[f-1][1];d=a[f][1];c=this.diff_commonOverlap_(b,d);e=this.diff_commonOverlap_(d,b);if(c>=e){if(c>=b.length/2||c>=d.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,d.substring(0,c))),a[f-1][1]=b.substring(0,b.length-c),a[f+1][1]=d.substring(c),f++}else if(e>=b.length/2||e>=d.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,b.substring(0,e))),a[f-1][0]=DIFF_INSERT,a[f-1][1]=d.substring(0,d.length-e),a[f+1][0]=DIFF_DELETE, a[f+1][1]=b.substring(e),f++;f++}f++}}; diff_match_patch.prototype.diff_cleanupSemanticLossless=function(a){function b(a,b){if(!a||!b)return 6;var c=a.charAt(a.length-1),d=b.charAt(0),e=c.match(diff_match_patch.nonAlphaNumericRegex_),f=d.match(diff_match_patch.nonAlphaNumericRegex_),g=e&&c.match(diff_match_patch.whitespaceRegex_),h=f&&d.match(diff_match_patch.whitespaceRegex_);c=g&&c.match(diff_match_patch.linebreakRegex_);d=h&&d.match(diff_match_patch.linebreakRegex_);var k=c&&a.match(diff_match_patch.blanklineEndRegex_),l=d&&b.match(diff_match_patch.blanklineStartRegex_); -return k||l?5:c||d?4:e&&!g&&h?3:g||h?2:e||f?1:0}for(var c=1;c<a.length-1;){if(a[c-1][0]==DIFF_EQUAL&&a[c+1][0]==DIFF_EQUAL){var d=a[c-1][1],e=a[c][1],f=a[c+1][1],g=this.diff_commonSuffix(d,e);if(g){var h=e.substring(e.length-g);d=d.substring(0,d.length-g);e=h+e.substring(0,e.length-g);f=h+f}g=d;h=e;for(var l=f,k=b(d,e)+b(e,f);e.charAt(0)===f.charAt(0);){d+=e.charAt(0);e=e.substring(1)+f.charAt(0);f=f.substring(1);var m=b(d,e)+b(e,f);m>=k&&(k=m,g=d,h=e,l=f)}a[c-1][1]!=g&&(g?a[c-1][1]=g:(a.splice(c- -1,1),c--),a[c][1]=h,l?a[c+1][1]=l:(a.splice(c+1,1),c--))}c++}};diff_match_patch.nonAlphaNumericRegex_=/[^a-zA-Z0-9]/;diff_match_patch.whitespaceRegex_=/\s/;diff_match_patch.linebreakRegex_=/[\r\n]/;diff_match_patch.blanklineEndRegex_=/\n\r?\n$/;diff_match_patch.blanklineStartRegex_=/^\r?\n\r?\n/; -diff_match_patch.prototype.diff_cleanupEfficiency=function(a){for(var b=!1,c=[],d=0,e=null,f=0,g=!1,h=!1,l=!1,k=!1;f<a.length;)a[f][0]==DIFF_EQUAL?(a[f][1].length<this.Diff_EditCost&&(l||k)?(c[d++]=f,g=l,h=k,e=a[f][1]):(d=0,e=null),l=k=!1):(a[f][0]==DIFF_DELETE?k=!0:l=!0,e&&(g&&h&&l&&k||e.length<this.Diff_EditCost/2&&3==g+h+l+k)&&(a.splice(c[d-1],0,new diff_match_patch.Diff(DIFF_DELETE,e)),a[c[d-1]+1][0]=DIFF_INSERT,d--,e=null,g&&h?(l=k=!0,d=0):(d--,f=0<d?c[d-1]:-1,l=k=!1),b=!0)),f++;b&&this.diff_cleanupMerge(a)}; -diff_match_patch.prototype.diff_cleanupMerge=function(a){a.push(new diff_match_patch.Diff(DIFF_EQUAL,""));for(var b=0,c=0,d=0,e="",f="",g;b<a.length;)switch(a[b][0]){case DIFF_INSERT:d++;f+=a[b][1];b++;break;case DIFF_DELETE:c++;e+=a[b][1];b++;break;case DIFF_EQUAL:1<c+d?(0!==c&&0!==d&&(g=this.diff_commonPrefix(f,e),0!==g&&(0<b-c-d&&a[b-c-d-1][0]==DIFF_EQUAL?a[b-c-d-1][1]+=f.substring(0,g):(a.splice(0,0,new diff_match_patch.Diff(DIFF_EQUAL,f.substring(0,g))),b++),f=f.substring(g),e=e.substring(g)), -g=this.diff_commonSuffix(f,e),0!==g&&(a[b][1]=f.substring(f.length-g)+a[b][1],f=f.substring(0,f.length-g),e=e.substring(0,e.length-g))),b-=c+d,a.splice(b,c+d),e.length&&(a.splice(b,0,new diff_match_patch.Diff(DIFF_DELETE,e)),b++),f.length&&(a.splice(b,0,new diff_match_patch.Diff(DIFF_INSERT,f)),b++),b++):0!==b&&a[b-1][0]==DIFF_EQUAL?(a[b-1][1]+=a[b][1],a.splice(b,1)):b++,c=d=0,f=e=""}""===a[a.length-1][1]&&a.pop();c=!1;for(b=1;b<a.length-1;)a[b-1][0]==DIFF_EQUAL&&a[b+1][0]==DIFF_EQUAL&&(a[b][1].substring(a[b][1].length- -a[b-1][1].length)==a[b-1][1]?(a[b][1]=a[b-1][1]+a[b][1].substring(0,a[b][1].length-a[b-1][1].length),a[b+1][1]=a[b-1][1]+a[b+1][1],a.splice(b-1,1),c=!0):a[b][1].substring(0,a[b+1][1].length)==a[b+1][1]&&(a[b-1][1]+=a[b+1][1],a[b][1]=a[b][1].substring(a[b+1][1].length)+a[b+1][1],a.splice(b+1,1),c=!0)),b++;c&&this.diff_cleanupMerge(a)}; -diff_match_patch.prototype.diff_xIndex=function(a,b){var c=0,d=0,e=0,f=0,g;for(g=0;g<a.length;g++){a[g][0]!==DIFF_INSERT&&(c+=a[g][1].length);a[g][0]!==DIFF_DELETE&&(d+=a[g][1].length);if(c>b)break;e=c;f=d}return a.length!=g&&a[g][0]===DIFF_DELETE?f:f+(b-e)}; -diff_match_patch.prototype.diff_prettyHtml=function(a){for(var b=[],c=/&/g,d=/</g,e=/>/g,f=/\n/g,g=0;g<a.length;g++){var h=a[g][0],l=a[g][1].replace(c,"&").replace(d,"<").replace(e,">").replace(f,"¶<br>");switch(h){case DIFF_INSERT:b[g]='<ins style="background:#e6ffe6;">'+l+"</ins>";break;case DIFF_DELETE:b[g]='<del style="background:#ffe6e6;">'+l+"</del>";break;case DIFF_EQUAL:b[g]="<span>"+l+"</span>"}}return b.join("")}; -diff_match_patch.prototype.diff_text1=function(a){for(var b=[],c=0;c<a.length;c++)a[c][0]!==DIFF_INSERT&&(b[c]=a[c][1]);return b.join("")};diff_match_patch.prototype.diff_text2=function(a){for(var b=[],c=0;c<a.length;c++)a[c][0]!==DIFF_DELETE&&(b[c]=a[c][1]);return b.join("")}; -diff_match_patch.prototype.diff_levenshtein=function(a){for(var b=0,c=0,d=0,e=0;e<a.length;e++){var f=a[e][1];switch(a[e][0]){case DIFF_INSERT:c+=f.length;break;case DIFF_DELETE:d+=f.length;break;case DIFF_EQUAL:b+=Math.max(c,d),d=c=0}}return b+=Math.max(c,d)}; -diff_match_patch.prototype.diff_toDelta=function(a){for(var b=[],c=0;c<a.length;c++)switch(a[c][0]){case DIFF_INSERT:b[c]="+"+encodeURI(a[c][1]);break;case DIFF_DELETE:b[c]="-"+a[c][1].length;break;case DIFF_EQUAL:b[c]="="+a[c][1].length}return b.join("\t").replace(/%20/g," ")}; -diff_match_patch.prototype.diff_fromDelta=function(a,b){for(var c=[],d=0,e=0,f=b.split(/\t/g),g=0;g<f.length;g++){var h=f[g].substring(1);switch(f[g].charAt(0)){case "+":try{c[d++]=new diff_match_patch.Diff(DIFF_INSERT,decodeURI(h))}catch(k){throw Error("Illegal escape in diff_fromDelta: "+h);}break;case "-":case "=":var l=parseInt(h,10);if(isNaN(l)||0>l)throw Error("Invalid number in diff_fromDelta: "+h);h=a.substring(e,e+=l);"="==f[g].charAt(0)?c[d++]=new diff_match_patch.Diff(DIFF_EQUAL,h):c[d++]= -new diff_match_patch.Diff(DIFF_DELETE,h);break;default:if(f[g])throw Error("Invalid diff operation in diff_fromDelta: "+f[g]);}}if(e!=a.length)throw Error("Delta length ("+e+") does not equal source text length ("+a.length+").");return c};diff_match_patch.prototype.match_main=function(a,b,c){if(null==a||null==b||null==c)throw Error("Null input. (match_main)");c=Math.max(0,Math.min(c,a.length));return a==b?0:a.length?a.substring(c,c+b.length)==b?c:this.match_bitap_(a,b,c):-1}; -diff_match_patch.prototype.match_bitap_=function(a,b,c){function d(a,d){var e=a/b.length,g=Math.abs(c-d);return f.Match_Distance?e+g/f.Match_Distance:g?1:e}if(b.length>this.Match_MaxBits)throw Error("Pattern too long for this browser.");var e=this.match_alphabet_(b),f=this,g=this.Match_Threshold,h=a.indexOf(b,c);-1!=h&&(g=Math.min(d(0,h),g),h=a.lastIndexOf(b,c+b.length),-1!=h&&(g=Math.min(d(0,h),g)));var l=1<<b.length-1;h=-1;for(var k,m,p=b.length+a.length,x,w=0;w<b.length;w++){k=0;for(m=p;k<m;)d(w, -c+m)<=g?k=m:p=m,m=Math.floor((p-k)/2+k);p=m;k=Math.max(1,c-m+1);var q=Math.min(c+m,a.length)+b.length;m=Array(q+2);for(m[q+1]=(1<<w)-1;q>=k;q--){var t=e[a.charAt(q-1)];m[q]=0===w?(m[q+1]<<1|1)&t:(m[q+1]<<1|1)&t|(x[q+1]|x[q])<<1|1|x[q+1];if(m[q]&l&&(t=d(w,q-1),t<=g))if(g=t,h=q-1,h>c)k=Math.max(1,2*c-h);else break}if(d(w+1,c)>g)break;x=m}return h}; -diff_match_patch.prototype.match_alphabet_=function(a){for(var b={},c=0;c<a.length;c++)b[a.charAt(c)]=0;for(c=0;c<a.length;c++)b[a.charAt(c)]|=1<<a.length-c-1;return b}; -diff_match_patch.prototype.patch_addContext_=function(a,b){if(0!=b.length){if(null===a.start2)throw Error("patch not initialized");for(var c=b.substring(a.start2,a.start2+a.length1),d=0;b.indexOf(c)!=b.lastIndexOf(c)&&c.length<this.Match_MaxBits-this.Patch_Margin-this.Patch_Margin;)d+=this.Patch_Margin,c=b.substring(a.start2-d,a.start2+a.length1+d);d+=this.Patch_Margin;(c=b.substring(a.start2-d,a.start2))&&a.diffs.unshift(new diff_match_patch.Diff(DIFF_EQUAL,c));(d=b.substring(a.start2+a.length1, -a.start2+a.length1+d))&&a.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,d));a.start1-=c.length;a.start2-=c.length;a.length1+=c.length+d.length;a.length2+=c.length+d.length}}; -diff_match_patch.prototype.patch_make=function(a,b,c){if("string"==typeof a&&"string"==typeof b&&"undefined"==typeof c){var d=a;b=this.diff_main(d,b,!0);2<b.length&&(this.diff_cleanupSemantic(b),this.diff_cleanupEfficiency(b))}else if(a&&"object"==typeof a&&"undefined"==typeof b&&"undefined"==typeof c)b=a,d=this.diff_text1(b);else if("string"==typeof a&&b&&"object"==typeof b&&"undefined"==typeof c)d=a;else if("string"==typeof a&&"string"==typeof b&&c&&"object"==typeof c)d=a,b=c;else throw Error("Unknown call format to patch_make."); -if(0===b.length)return[];c=[];a=new diff_match_patch.patch_obj;for(var e=0,f=0,g=0,h=d,l=0;l<b.length;l++){var k=b[l][0],m=b[l][1];e||k===DIFF_EQUAL||(a.start1=f,a.start2=g);switch(k){case DIFF_INSERT:a.diffs[e++]=b[l];a.length2+=m.length;d=d.substring(0,g)+m+d.substring(g);break;case DIFF_DELETE:a.length1+=m.length;a.diffs[e++]=b[l];d=d.substring(0,g)+d.substring(g+m.length);break;case DIFF_EQUAL:m.length<=2*this.Patch_Margin&&e&&b.length!=l+1?(a.diffs[e++]=b[l],a.length1+=m.length,a.length2+=m.length): -m.length>=2*this.Patch_Margin&&e&&(this.patch_addContext_(a,h),c.push(a),a=new diff_match_patch.patch_obj,e=0,h=d,f=g)}k!==DIFF_INSERT&&(f+=m.length);k!==DIFF_DELETE&&(g+=m.length)}e&&(this.patch_addContext_(a,h),c.push(a));return c}; -diff_match_patch.prototype.patch_deepCopy=function(a){for(var b=[],c=0;c<a.length;c++){var d=a[c],e=new diff_match_patch.patch_obj;e.diffs=[];for(var f=0;f<d.diffs.length;f++)e.diffs[f]=new diff_match_patch.Diff(d.diffs[f][0],d.diffs[f][1]);e.start1=d.start1;e.start2=d.start2;e.length1=d.length1;e.length2=d.length2;b[c]=e}return b}; -diff_match_patch.prototype.patch_apply=function(a,b){if(0==a.length)return[b,[]];a=this.patch_deepCopy(a);var c=this.patch_addPadding(a);b=c+b+c;this.patch_splitMax(a);for(var d=0,e=[],f=0;f<a.length;f++){var g=a[f].start2+d,h=this.diff_text1(a[f].diffs),l=-1;if(h.length>this.Match_MaxBits){var k=this.match_main(b,h.substring(0,this.Match_MaxBits),g);-1!=k&&(l=this.match_main(b,h.substring(h.length-this.Match_MaxBits),g+h.length-this.Match_MaxBits),-1==l||k>=l)&&(k=-1)}else k=this.match_main(b,h, -g);if(-1==k)e[f]=!1,d-=a[f].length2-a[f].length1;else if(e[f]=!0,d=k-g,g=-1==l?b.substring(k,k+h.length):b.substring(k,l+this.Match_MaxBits),h==g)b=b.substring(0,k)+this.diff_text2(a[f].diffs)+b.substring(k+h.length);else if(g=this.diff_main(h,g,!1),h.length>this.Match_MaxBits&&this.diff_levenshtein(g)/h.length>this.Patch_DeleteThreshold)e[f]=!1;else{this.diff_cleanupSemanticLossless(g);h=0;var m;for(l=0;l<a[f].diffs.length;l++){var p=a[f].diffs[l];p[0]!==DIFF_EQUAL&&(m=this.diff_xIndex(g,h));p[0]=== -DIFF_INSERT?b=b.substring(0,k+m)+p[1]+b.substring(k+m):p[0]===DIFF_DELETE&&(b=b.substring(0,k+m)+b.substring(k+this.diff_xIndex(g,h+p[1].length)));p[0]!==DIFF_DELETE&&(h+=p[1].length)}}}b=b.substring(c.length,b.length-c.length);return[b,e]}; -diff_match_patch.prototype.patch_addPadding=function(a){for(var b=this.Patch_Margin,c="",d=1;d<=b;d++)c+=String.fromCharCode(d);for(d=0;d<a.length;d++)a[d].start1+=b,a[d].start2+=b;d=a[0];var e=d.diffs;if(0==e.length||e[0][0]!=DIFF_EQUAL)e.unshift(new diff_match_patch.Diff(DIFF_EQUAL,c)),d.start1-=b,d.start2-=b,d.length1+=b,d.length2+=b;else if(b>e[0][1].length){var f=b-e[0][1].length;e[0][1]=c.substring(e[0][1].length)+e[0][1];d.start1-=f;d.start2-=f;d.length1+=f;d.length2+=f}d=a[a.length-1];e=d.diffs; -0==e.length||e[e.length-1][0]!=DIFF_EQUAL?(e.push(new diff_match_patch.Diff(DIFF_EQUAL,c)),d.length1+=b,d.length2+=b):b>e[e.length-1][1].length&&(f=b-e[e.length-1][1].length,e[e.length-1][1]+=c.substring(0,f),d.length1+=f,d.length2+=f);return c}; -diff_match_patch.prototype.patch_splitMax=function(a){for(var b=this.Match_MaxBits,c=0;c<a.length;c++)if(!(a[c].length1<=b)){var d=a[c];a.splice(c--,1);for(var e=d.start1,f=d.start2,g="";0!==d.diffs.length;){var h=new diff_match_patch.patch_obj,l=!0;h.start1=e-g.length;h.start2=f-g.length;""!==g&&(h.length1=h.length2=g.length,h.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,g)));for(;0!==d.diffs.length&&h.length1<b-this.Patch_Margin;){g=d.diffs[0][0];var k=d.diffs[0][1];g===DIFF_INSERT?(h.length2+= -k.length,f+=k.length,h.diffs.push(d.diffs.shift()),l=!1):g===DIFF_DELETE&&1==h.diffs.length&&h.diffs[0][0]==DIFF_EQUAL&&k.length>2*b?(h.length1+=k.length,e+=k.length,l=!1,h.diffs.push(new diff_match_patch.Diff(g,k)),d.diffs.shift()):(k=k.substring(0,b-h.length1-this.Patch_Margin),h.length1+=k.length,e+=k.length,g===DIFF_EQUAL?(h.length2+=k.length,f+=k.length):l=!1,h.diffs.push(new diff_match_patch.Diff(g,k)),k==d.diffs[0][1]?d.diffs.shift():d.diffs[0][1]=d.diffs[0][1].substring(k.length))}g=this.diff_text2(h.diffs); -g=g.substring(g.length-this.Patch_Margin);k=this.diff_text1(d.diffs).substring(0,this.Patch_Margin);""!==k&&(h.length1+=k.length,h.length2+=k.length,0!==h.diffs.length&&h.diffs[h.diffs.length-1][0]===DIFF_EQUAL?h.diffs[h.diffs.length-1][1]+=k:h.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,k)));l||a.splice(++c,0,h)}}};diff_match_patch.prototype.patch_toText=function(a){for(var b=[],c=0;c<a.length;c++)b[c]=a[c];return b.join("")}; -diff_match_patch.prototype.patch_fromText=function(a){var b=[];if(!a)return b;a=a.split("\n");for(var c=0,d=/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/;c<a.length;){var e=a[c].match(d);if(!e)throw Error("Invalid patch string: "+a[c]);var f=new diff_match_patch.patch_obj;b.push(f);f.start1=parseInt(e[1],10);""===e[2]?(f.start1--,f.length1=1):"0"==e[2]?f.length1=0:(f.start1--,f.length1=parseInt(e[2],10));f.start2=parseInt(e[3],10);""===e[4]?(f.start2--,f.length2=1):"0"==e[4]?f.length2=0:(f.start2--,f.length2= -parseInt(e[4],10));for(c++;c<a.length;){e=a[c].charAt(0);try{var g=decodeURI(a[c].substring(1))}catch(h){throw Error("Illegal escape in patch_fromText: "+g);}if("-"==e)f.diffs.push(new diff_match_patch.Diff(DIFF_DELETE,g));else if("+"==e)f.diffs.push(new diff_match_patch.Diff(DIFF_INSERT,g));else if(" "==e)f.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,g));else if("@"==e)break;else if(""!==e)throw Error('Invalid patch mode "'+e+'" in: '+g);c++}}return b}; +return k||l?5:c||d?4:e&&!g&&h?3:g||h?2:e||f?1:0}for(var d=1;d<a.length-1;){if(a[d-1][0]==DIFF_EQUAL&&a[d+1][0]==DIFF_EQUAL){var c=a[d-1][1],e=a[d][1],f=a[d+1][1],g=this.diff_commonSuffix(c,e);if(g){var h=e.substring(e.length-g);c=c.substring(0,c.length-g);e=h+e.substring(0,e.length-g);f=h+f}g=c;h=e;for(var l=f,k=b(c,e)+b(e,f);e.charAt(0)===f.charAt(0);){c+=e.charAt(0);e=e.substring(1)+f.charAt(0);f=f.substring(1);var m=b(c,e)+b(e,f);m>=k&&(k=m,g=c,h=e,l=f)}a[d-1][1]!=g&&(g?a[d-1][1]=g:(a.splice(d- +1,1),d--),a[d][1]=h,l?a[d+1][1]=l:(a.splice(d+1,1),d--))}d++}};diff_match_patch.nonAlphaNumericRegex_=/[^a-zA-Z0-9]/;diff_match_patch.whitespaceRegex_=/\s/;diff_match_patch.linebreakRegex_=/[\r\n]/;diff_match_patch.blanklineEndRegex_=/\n\r?\n$/;diff_match_patch.blanklineStartRegex_=/^\r?\n\r?\n/; +diff_match_patch.prototype.diff_cleanupEfficiency=function(a){for(var b=!1,d=[],c=0,e=null,f=0,g=!1,h=!1,l=!1,k=!1;f<a.length;)a[f][0]==DIFF_EQUAL?(a[f][1].length<this.Diff_EditCost&&(l||k)?(d[c++]=f,g=l,h=k,e=a[f][1]):(c=0,e=null),l=k=!1):(a[f][0]==DIFF_DELETE?k=!0:l=!0,e&&(g&&h&&l&&k||e.length<this.Diff_EditCost/2&&3==g+h+l+k)&&(a.splice(d[c-1],0,new diff_match_patch.Diff(DIFF_DELETE,e)),a[d[c-1]+1][0]=DIFF_INSERT,c--,e=null,g&&h?(l=k=!0,c=0):(c--,f=0<c?d[c-1]:-1,l=k=!1),b=!0)),f++;b&&this.diff_cleanupMerge(a)}; +diff_match_patch.prototype.diff_cleanupMerge=function(a){a.push(new diff_match_patch.Diff(DIFF_EQUAL,""));for(var b=0,d=0,c=0,e="",f="",g;b<a.length;)switch(a[b][0]){case DIFF_INSERT:c++;f+=a[b][1];b++;break;case DIFF_DELETE:d++;e+=a[b][1];b++;break;case DIFF_EQUAL:1<d+c?(0!==d&&0!==c&&(g=this.diff_commonPrefix(f,e),0!==g&&(0<b-d-c&&a[b-d-c-1][0]==DIFF_EQUAL?a[b-d-c-1][1]+=f.substring(0,g):(a.splice(0,0,new diff_match_patch.Diff(DIFF_EQUAL,f.substring(0,g))),b++),f=f.substring(g),e=e.substring(g)), +g=this.diff_commonSuffix(f,e),0!==g&&(a[b][1]=f.substring(f.length-g)+a[b][1],f=f.substring(0,f.length-g),e=e.substring(0,e.length-g))),b-=d+c,a.splice(b,d+c),e.length&&(a.splice(b,0,new diff_match_patch.Diff(DIFF_DELETE,e)),b++),f.length&&(a.splice(b,0,new diff_match_patch.Diff(DIFF_INSERT,f)),b++),b++):0!==b&&a[b-1][0]==DIFF_EQUAL?(a[b-1][1]+=a[b][1],a.splice(b,1)):b++,d=c=0,f=e=""}""===a[a.length-1][1]&&a.pop();d=!1;for(b=1;b<a.length-1;)a[b-1][0]==DIFF_EQUAL&&a[b+1][0]==DIFF_EQUAL&&(a[b][1].substring(a[b][1].length- +a[b-1][1].length)==a[b-1][1]?(a[b][1]=a[b-1][1]+a[b][1].substring(0,a[b][1].length-a[b-1][1].length),a[b+1][1]=a[b-1][1]+a[b+1][1],a.splice(b-1,1),d=!0):a[b][1].substring(0,a[b+1][1].length)==a[b+1][1]&&(a[b-1][1]+=a[b+1][1],a[b][1]=a[b][1].substring(a[b+1][1].length)+a[b+1][1],a.splice(b+1,1),d=!0)),b++;d&&this.diff_cleanupMerge(a)}; +diff_match_patch.prototype.diff_xIndex=function(a,b){var d=0,c=0,e=0,f=0,g;for(g=0;g<a.length;g++){a[g][0]!==DIFF_INSERT&&(d+=a[g][1].length);a[g][0]!==DIFF_DELETE&&(c+=a[g][1].length);if(d>b)break;e=d;f=c}return a.length!=g&&a[g][0]===DIFF_DELETE?f:f+(b-e)}; +diff_match_patch.prototype.diff_prettyHtml=function(a){for(var b=[],d=/&/g,c=/</g,e=/>/g,f=/\n/g,g=0;g<a.length;g++){var h=a[g][0],l=a[g][1].replace(d,"&").replace(c,"<").replace(e,">").replace(f,"¶<br>");switch(h){case DIFF_INSERT:b[g]='<ins style="background:#e6ffe6;">'+l+"</ins>";break;case DIFF_DELETE:b[g]='<del style="background:#ffe6e6;">'+l+"</del>";break;case DIFF_EQUAL:b[g]="<span>"+l+"</span>"}}return b.join("")}; +diff_match_patch.prototype.diff_text1=function(a){for(var b=[],d=0;d<a.length;d++)a[d][0]!==DIFF_INSERT&&(b[d]=a[d][1]);return b.join("")};diff_match_patch.prototype.diff_text2=function(a){for(var b=[],d=0;d<a.length;d++)a[d][0]!==DIFF_DELETE&&(b[d]=a[d][1]);return b.join("")}; +diff_match_patch.prototype.diff_levenshtein=function(a){for(var b=0,d=0,c=0,e=0;e<a.length;e++){var f=a[e][1];switch(a[e][0]){case DIFF_INSERT:d+=f.length;break;case DIFF_DELETE:c+=f.length;break;case DIFF_EQUAL:b+=Math.max(d,c),c=d=0}}return b+=Math.max(d,c)};diff_match_patch.prototype.isHighSurrogate=function(a){a=a.charCodeAt(0);return 55296<=a&&56319>=a};diff_match_patch.prototype.isLowSurrogate=function(a){a=a.charCodeAt(0);return 56320<=a&&57343>=a}; +diff_match_patch.prototype.diff_toDelta=function(a){for(var b=[],d,c=0;c<a.length;c++){var e=a[c],f=e[1][0],g=e[1][e[1].length-1];if(0!==e[1].length&&(g&&this.isHighSurrogate(g)&&(d=g,e[1]=e[1].slice(0,-1)),d&&f&&this.isHighSurrogate(d)&&this.isLowSurrogate(f)&&(e[1]=d+e[1]),0!==e[1].length))switch(e[0]){case DIFF_INSERT:b.push("+"+encodeURI(e[1]));break;case DIFF_DELETE:b.push("-"+e[1].length);break;case DIFF_EQUAL:b.push("="+e[1].length)}}return b.join("\t").replace(/%20/g," ")}; +diff_match_patch.prototype.digit16=function(a){switch(a){case "0":return 0;case "1":return 1;case "2":return 2;case "3":return 3;case "4":return 4;case "5":return 5;case "6":return 6;case "7":return 7;case "8":return 8;case "9":return 9;case "A":case "a":return 10;case "B":case "b":return 11;case "C":case "c":return 12;case "D":case "d":return 13;case "E":case "e":return 14;case "F":case "f":return 15;default:throw Error("Invalid hex-code");}}; +diff_match_patch.prototype.decodeURI=function(a){try{return decodeURI(a)}catch(h){for(var b=0,d="";b<a.length;)if("%"!==a[b])d+=a[b++];else{var c=(this.digit16(a[b+1])<<4)+this.digit16(a[b+2]);if(0===(c&128))d+=String.fromCharCode(c),b+=3;else{if("%"!==a[b+3])throw new URIError("URI malformed");var e=(this.digit16(a[b+4])<<4)+this.digit16(a[b+5]);if(128!==(e&192))throw new URIError("URI malformed");e&=63;if(192===(c&224))d+=String.fromCharCode((c&31)<<6|e),b+=6;else{if("%"!==a[b+6])throw new URIError("URI malformed"); +var f=(this.digit16(a[b+7])<<4)+this.digit16(a[b+8]);if(128!==(f&192))throw new URIError("URI malformed");f&=63;if(224===(c&240))d+=String.fromCharCode((c&15)<<12|e<<6|f),b+=9;else{if("%"!==a[b+9])throw new URIError("URI malformed");var g=(this.digit16(a[b+10])<<4)+this.digit16(a[b+11]);if(128!==(g&192))throw new URIError("URI malformed");g&=63;if(240===(c&248)&&(c=(c&7)<<18|e<<12|f<<6|g,65536<=c&&1114111>=c)){d+=String.fromCharCode((c&65535)>>>10&1023|55296);d+=String.fromCharCode(56320|c&1023); +b+=12;continue}throw new URIError("URI malformed");}}}}return d}}; +diff_match_patch.prototype.diff_fromDelta=function(a,b){for(var d=[],c=0,e=0,f=b.split(/\t/g),g=0;g<f.length;g++){var h=f[g].substring(1);switch(f[g].charAt(0)){case "+":try{d[c++]=new diff_match_patch.Diff(DIFF_INSERT,this.decodeURI(h))}catch(k){throw Error("Illegal escape in diff_fromDelta: "+h);}break;case "-":case "=":var l=parseInt(h,10);if(isNaN(l)||0>l)throw Error("Invalid number in diff_fromDelta: "+h);h=a.substring(e,e+=l);"="==f[g].charAt(0)?d[c++]=new diff_match_patch.Diff(DIFF_EQUAL,h): +d[c++]=new diff_match_patch.Diff(DIFF_DELETE,h);break;default:if(f[g])throw Error("Invalid diff operation in diff_fromDelta: "+f[g]);}}if(e!=a.length)throw Error("Delta length ("+e+") does not equal source text length ("+a.length+").");return d};diff_match_patch.prototype.match_main=function(a,b,d){if(null==a||null==b||null==d)throw Error("Null input. (match_main)");d=Math.max(0,Math.min(d,a.length));return a==b?0:a.length?a.substring(d,d+b.length)==b?d:this.match_bitap_(a,b,d):-1}; +diff_match_patch.prototype.match_bitap_=function(a,b,d){function c(a,c){var e=a/b.length,g=Math.abs(d-c);return f.Match_Distance?e+g/f.Match_Distance:g?1:e}if(b.length>this.Match_MaxBits)throw Error("Pattern too long for this browser.");var e=this.match_alphabet_(b),f=this,g=this.Match_Threshold,h=a.indexOf(b,d);-1!=h&&(g=Math.min(c(0,h),g),h=a.lastIndexOf(b,d+b.length),-1!=h&&(g=Math.min(c(0,h),g)));var l=1<<b.length-1;h=-1;for(var k,m,p=b.length+a.length,x,w=0;w<b.length;w++){k=0;for(m=p;k<m;)c(w, +d+m)<=g?k=m:p=m,m=Math.floor((p-k)/2+k);p=m;k=Math.max(1,d-m+1);var q=Math.min(d+m,a.length)+b.length;m=Array(q+2);for(m[q+1]=(1<<w)-1;q>=k;q--){var t=e[a.charAt(q-1)];m[q]=0===w?(m[q+1]<<1|1)&t:(m[q+1]<<1|1)&t|(x[q+1]|x[q])<<1|1|x[q+1];if(m[q]&l&&(t=c(w,q-1),t<=g))if(g=t,h=q-1,h>d)k=Math.max(1,2*d-h);else break}if(c(w+1,d)>g)break;x=m}return h}; +diff_match_patch.prototype.match_alphabet_=function(a){for(var b={},d=0;d<a.length;d++)b[a.charAt(d)]=0;for(d=0;d<a.length;d++)b[a.charAt(d)]|=1<<a.length-d-1;return b}; +diff_match_patch.prototype.patch_addContext_=function(a,b){if(0!=b.length){if(null===a.start2)throw Error("patch not initialized");for(var d=b.substring(a.start2,a.start2+a.length1),c=0;b.indexOf(d)!=b.lastIndexOf(d)&&d.length<this.Match_MaxBits-this.Patch_Margin-this.Patch_Margin;)c+=this.Patch_Margin,d=b.substring(a.start2-c,a.start2+a.length1+c);c+=this.Patch_Margin;(d=b.substring(a.start2-c,a.start2))&&a.diffs.unshift(new diff_match_patch.Diff(DIFF_EQUAL,d));(c=b.substring(a.start2+a.length1, +a.start2+a.length1+c))&&a.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,c));a.start1-=d.length;a.start2-=d.length;a.length1+=d.length+c.length;a.length2+=d.length+c.length}}; +diff_match_patch.prototype.patch_make=function(a,b,d){if("string"==typeof a&&"string"==typeof b&&"undefined"==typeof d){var c=a;b=this.diff_main(c,b,!0);2<b.length&&(this.diff_cleanupSemantic(b),this.diff_cleanupEfficiency(b))}else if(a&&"object"==typeof a&&"undefined"==typeof b&&"undefined"==typeof d)b=a,c=this.diff_text1(b);else if("string"==typeof a&&b&&"object"==typeof b&&"undefined"==typeof d)c=a;else if("string"==typeof a&&"string"==typeof b&&d&&"object"==typeof d)c=a,b=d;else throw Error("Unknown call format to patch_make."); +if(0===b.length)return[];d=[];a=new diff_match_patch.patch_obj;for(var e=0,f=0,g=0,h=c,l=0;l<b.length;l++){var k=b[l][0],m=b[l][1];e||k===DIFF_EQUAL||(a.start1=f,a.start2=g);switch(k){case DIFF_INSERT:a.diffs[e++]=b[l];a.length2+=m.length;c=c.substring(0,g)+m+c.substring(g);break;case DIFF_DELETE:a.length1+=m.length;a.diffs[e++]=b[l];c=c.substring(0,g)+c.substring(g+m.length);break;case DIFF_EQUAL:m.length<=2*this.Patch_Margin&&e&&b.length!=l+1?(a.diffs[e++]=b[l],a.length1+=m.length,a.length2+=m.length): +m.length>=2*this.Patch_Margin&&e&&(this.patch_addContext_(a,h),d.push(a),a=new diff_match_patch.patch_obj,e=0,h=c,f=g)}k!==DIFF_INSERT&&(f+=m.length);k!==DIFF_DELETE&&(g+=m.length)}e&&(this.patch_addContext_(a,h),d.push(a));return d}; +diff_match_patch.prototype.patch_deepCopy=function(a){for(var b=[],d=0;d<a.length;d++){var c=a[d],e=new diff_match_patch.patch_obj;e.diffs=[];for(var f=0;f<c.diffs.length;f++)e.diffs[f]=new diff_match_patch.Diff(c.diffs[f][0],c.diffs[f][1]);e.start1=c.start1;e.start2=c.start2;e.length1=c.length1;e.length2=c.length2;b[d]=e}return b}; +diff_match_patch.prototype.patch_apply=function(a,b){if(0==a.length)return[b,[]];a=this.patch_deepCopy(a);var d=this.patch_addPadding(a);b=d+b+d;this.patch_splitMax(a);for(var c=0,e=[],f=0;f<a.length;f++){var g=a[f].start2+c,h=this.diff_text1(a[f].diffs),l=-1;if(h.length>this.Match_MaxBits){var k=this.match_main(b,h.substring(0,this.Match_MaxBits),g);-1!=k&&(l=this.match_main(b,h.substring(h.length-this.Match_MaxBits),g+h.length-this.Match_MaxBits),-1==l||k>=l)&&(k=-1)}else k=this.match_main(b,h, +g);if(-1==k)e[f]=!1,c-=a[f].length2-a[f].length1;else if(e[f]=!0,c=k-g,g=-1==l?b.substring(k,k+h.length):b.substring(k,l+this.Match_MaxBits),h==g)b=b.substring(0,k)+this.diff_text2(a[f].diffs)+b.substring(k+h.length);else if(g=this.diff_main(h,g,!1),h.length>this.Match_MaxBits&&this.diff_levenshtein(g)/h.length>this.Patch_DeleteThreshold)e[f]=!1;else{this.diff_cleanupSemanticLossless(g);h=0;var m;for(l=0;l<a[f].diffs.length;l++){var p=a[f].diffs[l];p[0]!==DIFF_EQUAL&&(m=this.diff_xIndex(g,h));p[0]=== +DIFF_INSERT?b=b.substring(0,k+m)+p[1]+b.substring(k+m):p[0]===DIFF_DELETE&&(b=b.substring(0,k+m)+b.substring(k+this.diff_xIndex(g,h+p[1].length)));p[0]!==DIFF_DELETE&&(h+=p[1].length)}}}b=b.substring(d.length,b.length-d.length);return[b,e]}; +diff_match_patch.prototype.patch_addPadding=function(a){for(var b=this.Patch_Margin,d="",c=1;c<=b;c++)d+=String.fromCharCode(c);for(c=0;c<a.length;c++)a[c].start1+=b,a[c].start2+=b;c=a[0];var e=c.diffs;if(0==e.length||e[0][0]!=DIFF_EQUAL)e.unshift(new diff_match_patch.Diff(DIFF_EQUAL,d)),c.start1-=b,c.start2-=b,c.length1+=b,c.length2+=b;else if(b>e[0][1].length){var f=b-e[0][1].length;e[0][1]=d.substring(e[0][1].length)+e[0][1];c.start1-=f;c.start2-=f;c.length1+=f;c.length2+=f}c=a[a.length-1];e=c.diffs; +0==e.length||e[e.length-1][0]!=DIFF_EQUAL?(e.push(new diff_match_patch.Diff(DIFF_EQUAL,d)),c.length1+=b,c.length2+=b):b>e[e.length-1][1].length&&(f=b-e[e.length-1][1].length,e[e.length-1][1]+=d.substring(0,f),c.length1+=f,c.length2+=f);return d}; +diff_match_patch.prototype.patch_splitMax=function(a){for(var b=this.Match_MaxBits,d=0;d<a.length;d++)if(!(a[d].length1<=b)){var c=a[d];a.splice(d--,1);for(var e=c.start1,f=c.start2,g="";0!==c.diffs.length;){var h=new diff_match_patch.patch_obj,l=!0;h.start1=e-g.length;h.start2=f-g.length;""!==g&&(h.length1=h.length2=g.length,h.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,g)));for(;0!==c.diffs.length&&h.length1<b-this.Patch_Margin;){g=c.diffs[0][0];var k=c.diffs[0][1];g===DIFF_INSERT?(h.length2+= +k.length,f+=k.length,h.diffs.push(c.diffs.shift()),l=!1):g===DIFF_DELETE&&1==h.diffs.length&&h.diffs[0][0]==DIFF_EQUAL&&k.length>2*b?(h.length1+=k.length,e+=k.length,l=!1,h.diffs.push(new diff_match_patch.Diff(g,k)),c.diffs.shift()):(k=k.substring(0,b-h.length1-this.Patch_Margin),h.length1+=k.length,e+=k.length,g===DIFF_EQUAL?(h.length2+=k.length,f+=k.length):l=!1,h.diffs.push(new diff_match_patch.Diff(g,k)),k==c.diffs[0][1]?c.diffs.shift():c.diffs[0][1]=c.diffs[0][1].substring(k.length))}g=this.diff_text2(h.diffs); +g=g.substring(g.length-this.Patch_Margin);k=this.diff_text1(c.diffs).substring(0,this.Patch_Margin);""!==k&&(h.length1+=k.length,h.length2+=k.length,0!==h.diffs.length&&h.diffs[h.diffs.length-1][0]===DIFF_EQUAL?h.diffs[h.diffs.length-1][1]+=k:h.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,k)));l||a.splice(++d,0,h)}}};diff_match_patch.prototype.patch_toText=function(a){for(var b=[],d=0;d<a.length;d++)b[d]=a[d];return b.join("")}; +diff_match_patch.prototype.patch_fromText=function(a){var b=[];if(!a)return b;a=a.split("\n");for(var d=0,c=/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/;d<a.length;){var e=a[d].match(c);if(!e)throw Error("Invalid patch string: "+a[d]);var f=new diff_match_patch.patch_obj;b.push(f);f.start1=parseInt(e[1],10);""===e[2]?(f.start1--,f.length1=1):"0"==e[2]?f.length1=0:(f.start1--,f.length1=parseInt(e[2],10));f.start2=parseInt(e[3],10);""===e[4]?(f.start2--,f.length2=1):"0"==e[4]?f.length2=0:(f.start2--,f.length2= +parseInt(e[4],10));for(d++;d<a.length;){e=a[d].charAt(0);try{var g=decodeURI(a[d].substring(1))}catch(h){throw Error("Illegal escape in patch_fromText: "+g);}if("-"==e)f.diffs.push(new diff_match_patch.Diff(DIFF_DELETE,g));else if("+"==e)f.diffs.push(new diff_match_patch.Diff(DIFF_INSERT,g));else if(" "==e)f.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,g));else if("@"==e)break;else if(""!==e)throw Error('Invalid patch mode "'+e+'" in: '+g);d++}}return b}; diff_match_patch.patch_obj=function(){this.diffs=[];this.start2=this.start1=null;this.length2=this.length1=0}; -diff_match_patch.patch_obj.prototype.toString=function(){for(var a=["@@ -"+(0===this.length1?this.start1+",0":1==this.length1?this.start1+1:this.start1+1+","+this.length1)+" +"+(0===this.length2?this.start2+",0":1==this.length2?this.start2+1:this.start2+1+","+this.length2)+" @@\n"],b,c=0;c<this.diffs.length;c++){switch(this.diffs[c][0]){case DIFF_INSERT:b="+";break;case DIFF_DELETE:b="-";break;case DIFF_EQUAL:b=" "}a[c+1]=b+encodeURI(this.diffs[c][1])+"\n"}return a.join("").replace(/%20/g," ")}; -this.diff_match_patch=diff_match_patch;this.DIFF_DELETE=DIFF_DELETE;this.DIFF_INSERT=DIFF_INSERT;this.DIFF_EQUAL=DIFF_EQUAL; +diff_match_patch.patch_obj.prototype.toString=function(){for(var a=["@@ -"+(0===this.length1?this.start1+",0":1==this.length1?this.start1+1:this.start1+1+","+this.length1)+" +"+(0===this.length2?this.start2+",0":1==this.length2?this.start2+1:this.start2+1+","+this.length2)+" @@\n"],b,d=0;d<this.diffs.length;d++){switch(this.diffs[d][0]){case DIFF_INSERT:b="+";break;case DIFF_DELETE:b="-";break;case DIFF_EQUAL:b=" "}a[d+1]=b+encodeURI(this.diffs[d][1])+"\n"}return a.join("").replace(/%20/g," ")}; +this.diff_match_patch=diff_match_patch;this.DIFF_DELETE=DIFF_DELETE;this.DIFF_INSERT=DIFF_INSERT;this.DIFF_EQUAL=DIFF_EQUAL; \ No newline at end of file diff --git a/javascript/diff_match_patch_uncompressed.js b/javascript/diff_match_patch_uncompressed.js index 88a702c2..9eeeada6 100644 --- a/javascript/diff_match_patch_uncompressed.js +++ b/javascript/diff_match_patch_uncompressed.js @@ -1339,6 +1339,23 @@ diff_match_patch.prototype.diff_levenshtein = function(diffs) { return levenshtein; }; +diff_match_patch.prototype.isHighSurrogate = function(c) { + if(typeof c !== 'string'){ + return false + } + + var v = c.charCodeAt(0); + return v >= 0xD800 && v <= 0xDBFF; +} + +diff_match_patch.prototype.isLowSurrogate = function(c) { + if(typeof c !== 'string'){ + return false + } + + var v = c.charCodeAt(0); + return v >= 0xDC00 && v <= 0xDFFF; +} /** * Crush the diff into an encoded string which describes the operations @@ -1350,22 +1367,161 @@ diff_match_patch.prototype.diff_levenshtein = function(diffs) { */ diff_match_patch.prototype.diff_toDelta = function(diffs) { var text = []; + var lastEnd; for (var x = 0; x < diffs.length; x++) { - switch (diffs[x][0]) { + var thisDiff = diffs[x]; + var thisTop = thisDiff[1][0]; + var thisEnd = thisDiff[1][thisDiff[1].length - 1]; + + if (0 === thisDiff[1].length) { + continue; + } + + // trap a trailing high-surrogate so we can + // distribute it to the successive edits + if (thisEnd && this.isHighSurrogate(thisEnd)) { + lastEnd = thisEnd; + thisDiff[1] = thisDiff[1].slice(0, -1); + } + + if (lastEnd && thisTop && this.isHighSurrogate(lastEnd) && this.isLowSurrogate(thisTop)) { + thisDiff[1] = lastEnd + thisDiff[1]; + } + + if (0 === thisDiff[1].length) { + continue; + } + + switch (thisDiff[0]) { case DIFF_INSERT: - text[x] = '+' + encodeURI(diffs[x][1]); + text.push('+' + encodeURI(thisDiff[1])); break; case DIFF_DELETE: - text[x] = '-' + diffs[x][1].length; + text.push('-' + thisDiff[1].length); break; case DIFF_EQUAL: - text[x] = '=' + diffs[x][1].length; + text.push('=' + thisDiff[1].length); break; } } return text.join('\t').replace(/%20/g, ' '); }; +diff_match_patch.prototype.digit16 = function(c) { + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'A': case 'a': return 10; + case 'B': case 'b': return 11; + case 'C': case 'c': return 12; + case 'D': case 'd': return 13; + case 'E': case 'e': return 14; + case 'F': case 'f': return 15; + default: throw new Error('Invalid hex-code'); + } +}; + +/** + * Decode URI-encoded string but allow for encoded surrogate halves + * + * diff_match_patch needs this relaxation of the requirements because + * not all libraries and versions produce valid URI strings in toDelta + * and we don't want to crash this code when the input is valid input + * but at the same time invalid utf-8 + * + * @example: decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70' + * @example: decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c' + * + * @cite: @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js + * + * @param {String} text input string encoded by encodeURI() or equivalent + * @return {String} + */ +diff_match_patch.prototype.decodeURI = function(text) { + try { + return decodeURI(text); + } catch ( e ) { + var i = 0; + var decoded = ''; + + while (i < text.length) { + if ( text[i] !== '%' ) { + decoded += text[i++]; + continue; + } + + // start a percent-sequence + var byte1 = (this.digit16(text[i + 1]) << 4) + this.digit16(text[i + 2]); + if ((byte1 & 0x80) === 0) { + decoded += String.fromCharCode(byte1); + i += 3; + continue; + } + + if ('%' !== text[i + 3]) { + throw new URIError('URI malformed'); + } + + var byte2 = (this.digit16(text[i + 4]) << 4) + this.digit16(text[i + 5]); + if ((byte2 & 0xC0) !== 0x80) { + throw new URIError('URI malformed'); + } + byte2 = byte2 & 0x3F; + if ((byte1 & 0xE0) === 0xC0) { + decoded += String.fromCharCode(((byte1 & 0x1F) << 6) | byte2); + i += 6; + continue; + } + + if ('%' !== text[i + 6]) { + throw new URIError('URI malformed'); + } + + var byte3 = (this.digit16(text[i + 7]) << 4) + this.digit16(text[i + 8]); + if ((byte3 & 0xC0) !== 0x80) { + throw new URIError('URI malformed'); + } + byte3 = byte3 & 0x3F; + if ((byte1 & 0xF0) === 0xE0) { + // unpaired surrogate are fine here + decoded += String.fromCharCode(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3); + i += 9; + continue; + } + + if ('%' !== text[i + 9]) { + throw new URIError('URI malformed'); + } + + var byte4 = (this.digit16(text[i + 10]) << 4) + this.digit16(text[i + 11]); + if ((byte4 & 0xC0) !== 0x80) { + throw new URIError('URI malformed'); + } + byte4 = byte4 & 0x3F; + if ((byte1 & 0xF8) === 0xF0) { + var codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4; + if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) { + decoded += String.fromCharCode((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800); + decoded += String.fromCharCode(0xDC00 | (codePoint & 0xFFFF) & 0x3FF); + i += 12; + continue; + } + } + + throw new URIError('URI malformed'); + } + + return decoded; + } +}; /** * Given the original text1, and an encoded string which describes the @@ -1388,7 +1544,7 @@ diff_match_patch.prototype.diff_fromDelta = function(text1, delta) { case '+': try { diffs[diffsLength++] = - new diff_match_patch.Diff(DIFF_INSERT, decodeURI(param)); + new diff_match_patch.Diff(DIFF_INSERT, this.decodeURI(param)); } catch (ex) { // Malformed URI sequence. throw new Error('Illegal escape in diff_fromDelta: ' + param); @@ -1626,14 +1782,22 @@ diff_match_patch.prototype.patch_addContext_ = function(patch, text) { // Add one chunk for good luck. padding += this.Patch_Margin; + + // Add the prefix. - var prefix = text.substring(patch.start2 - padding, patch.start2); + var prefix = this.isLowSurrogate(text[patch.start2 - padding]) // Avoid splitting on non-character boundaries + ? text.substring(patch.start2 - padding - 1 , patch.start2) + : text.substring(patch.start2 - padding , patch.start2); + if (prefix) { patch.diffs.unshift(new diff_match_patch.Diff(DIFF_EQUAL, prefix)); } + // Add the suffix. - var suffix = text.substring(patch.start2 + patch.length1, - patch.start2 + patch.length1 + padding); + var suffix = this.isHighSurrogate(text[patch.start2 + patch.length1 + padding]) // Avoid splitting on non-character boundaries + ? text.substring(patch.start2 + patch.length1, patch.start2 + patch.length1 + padding + 1) + : text.substring(patch.start2 + patch.length1, patch.start2 + patch.length1 + padding); + if (suffix) { patch.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL, suffix)); } @@ -2067,6 +2231,46 @@ diff_match_patch.prototype.patch_splitMax = function(patches) { } }; +diff_match_patch.prototype.diffs_joinSurrogatePairs = function(diffs) { + var lastEnd; + var overwrittenDiffsCounter = 0; + + for (var x = 0 ; x < diffs.length ; x++) { + var thisDiff = diffs[x]; + var thisTop = thisDiff[1][0]; + var thisEnd = thisDiff[1][thisDiff[1].length - 1]; + + if (0 === thisDiff[1].length) { + continue; + } + + // trap a trailing high-surrogate so we can + // distribute it to the successive edits + if (thisEnd && this.isHighSurrogate(thisEnd)) { + lastEnd = thisEnd; + thisDiff[1] = thisDiff[1].slice(0, -1); + } + + if (lastEnd && thisTop && this.isHighSurrogate(lastEnd) && this.isLowSurrogate(thisTop)) { + thisDiff[1] = lastEnd + thisDiff[1]; + } + + if (0 === thisDiff[1].length) { + continue; + } + + diffs[overwrittenDiffsCounter] = thisDiff; + overwrittenDiffsCounter ++; + } + + return diffs.splice(0, overwrittenDiffsCounter) +} + +diff_match_patch.prototype.patch_joinSurrogatePairs = function(patch) { + patch.diffs = this.diffs_joinSurrogatePairs(patch.diffs) + return patch +} + /** * Take a list of patches and return a textual representation. @@ -2076,7 +2280,7 @@ diff_match_patch.prototype.patch_splitMax = function(patches) { diff_match_patch.prototype.patch_toText = function(patches) { var text = []; for (var x = 0; x < patches.length; x++) { - text[x] = patches[x]; + text[x] = this.patch_joinSurrogatePairs(patches[x]); } return text.join(''); }; @@ -2129,7 +2333,7 @@ diff_match_patch.prototype.patch_fromText = function(textline) { while (textPointer < text.length) { var sign = text[textPointer].charAt(0); try { - var line = decodeURI(text[textPointer].substring(1)); + var line = this.decodeURI(text[textPointer].substring(1)); } catch (ex) { // Malformed URI sequence. throw new Error('Illegal escape in patch_fromText: ' + line); @@ -2219,18 +2423,8 @@ diff_match_patch.patch_obj.prototype.toString = function() { return text.join('').replace(/%20/g, ' '); }; -// CLOSURE:begin_strip -// Lines below here will not be included in the Closure-compatible library. - -// Export these global variables so that they survive Google's JS compiler. -// In a browser, 'this' will be 'window'. -// Users of node.js should 'require' the uncompressed version since Google's -// JS compiler may break the following exports for non-browser environments. -/** @suppress {globalThis} */ -this['diff_match_patch'] = diff_match_patch; -/** @suppress {globalThis} */ -this['DIFF_DELETE'] = DIFF_DELETE; -/** @suppress {globalThis} */ -this['DIFF_INSERT'] = DIFF_INSERT; -/** @suppress {globalThis} */ -this['DIFF_EQUAL'] = DIFF_EQUAL; +module.exports = diff_match_patch; +module.exports['diff_match_patch'] = diff_match_patch; +module.exports['DIFF_DELETE'] = DIFF_DELETE; +module.exports['DIFF_INSERT'] = DIFF_INSERT; +module.exports['DIFF_EQUAL'] = DIFF_EQUAL; \ No newline at end of file diff --git a/javascript/package.json b/javascript/package.json new file mode 100644 index 00000000..94d40f89 --- /dev/null +++ b/javascript/package.json @@ -0,0 +1,17 @@ +{ + "name": "diff-match-patch", + "version": "1.0.3", + "description": "diff-patch-match fork with fixed surrogate pair processing", + "main": "diff_match_patch_uncompressed.js", + "directories": { + "test": "tests" + }, + "scripts": { + }, + "repository": { + "type": "git", + "url": "https://github.com/feedyou-ai/diff-match-patch/tree/master/javascript" + }, + "author": "Google inc. (https://github.com/google), modified by https://github.com/dmsnell and https://github.com/feedyou-ai/", + "license": "Apache-2.0" +} diff --git a/javascript/tests/diff_match_patch_test.js b/javascript/tests/diff_match_patch_test.js index 109e56ad..99aaca13 100644 --- a/javascript/tests/diff_match_patch_test.js +++ b/javascript/tests/diff_match_patch_test.js @@ -492,6 +492,136 @@ function testDiffDelta() { // Convert delta string into a diff. assertEquivalent(diffs, dmp.diff_fromDelta(text1, delta)); + diffs = [[DIFF_EQUAL, '\ud83d\ude4b\ud83d'], [DIFF_INSERT, '\ude4c\ud83d'], [DIFF_EQUAL, '\ude4b']]; + try { + delta = dmp.diff_toDelta(diffs); + assertEquals('=2\t+%F0%9F%99%8C\t=2', delta); + } catch ( e ) { + assertEquals(false, true); + } + + (function(){ + const originalText = `U+1F17x π °οΈ π ±οΈ π ΎοΈ π ΏοΈ safhawifhkw + U+1F18x π + 0 1 2 3 4 5 6 7 8 9 A B C D E F + U+1F19x π π π π π π π π π π + U+1F20x π ποΈ sfss.,_||saavvvbbds + U+1F21x π + U+1F22x π― + U+1F23x π² π³ π΄ π΅ πΆ π·οΈ πΈ πΉ πΊ + U+1F25x π π + U+1F30x π π π π π π π π π π π π π π π π + U+1F31x π π π π π π π π π π π π π π π `; + + // applies some random edits to string and returns new, edited string + function applyRandomTextEdit(text) { + let textArr = [...text]; + let r = Math.random(); + if(r < 1/3) { // swap + let swapCount = Math.floor(Math.random()*5); + for(let i = 0; i < swapCount; i++) { + let swapPos1 = Math.floor(Math.random()*textArr.length); + let swapPos2 = Math.floor(Math.random()*textArr.length); + let char1 = textArr[swapPos1]; + let char2 = textArr[swapPos2]; + textArr[swapPos1] = char2; + textArr[swapPos2] = char1; + } + } else if(r < 2/3) { // remove + let removeCount = Math.floor(Math.random()*5); + for(let i = 0; i < removeCount; i++) { + let removePos = Math.floor(Math.random()*textArr.length); + textArr[removePos] = ""; + } + } else { // add + let addCount = Math.floor(Math.random()*5); + for(let i = 0; i < addCount; i++) { + let addPos = Math.floor(Math.random()*textArr.length); + let addFromPos = Math.floor(Math.random()*textArr.length); + textArr[addPos] = textArr[addPos] + textArr[addFromPos]; + } + } + return textArr.join(""); + } + + for(let i = 0; i < 1000; i++) { + newText = applyRandomTextEdit(originalText); + dmp.diff_toDelta(dmp.diff_main(originalText, newText)); + } + })(); + + // Unicode - splitting surrogates + try { + assertEquivalent( + dmp.diff_toDelta([[DIFF_INSERT,'\ud83c\udd71'], [DIFF_EQUAL, '\ud83c\udd70\ud83c\udd71']]), + dmp.diff_toDelta(dmp.diff_main('\ud83c\udd70\ud83c\udd71', '\ud83c\udd71\ud83c\udd70\ud83c\udd71')) + ); + } catch ( e ) { + assertEquals('Inserting similar surrogate pair at beginning', 'crashed'); + } + + try { + assertEquivalent( + dmp.diff_toDelta([[DIFF_EQUAL,'\ud83c\udd70'], [DIFF_INSERT, '\ud83c\udd70'], [DIFF_EQUAL, '\ud83c\udd71']]), + dmp.diff_toDelta(dmp.diff_main('\ud83c\udd70\ud83c\udd71', '\ud83c\udd70\ud83c\udd70\ud83c\udd71')) + ); + } catch ( e ) { + assertEquals('Inserting similar surrogate pair in the middle', 'crashed'); + } + + try { + assertEquivalent( + dmp.diff_toDelta([[DIFF_DELETE,'\ud83c\udd71'], [DIFF_EQUAL, '\ud83c\udd70\ud83c\udd71']]), + dmp.diff_toDelta(dmp.diff_main('\ud83c\udd71\ud83c\udd70\ud83c\udd71', '\ud83c\udd70\ud83c\udd71')) + ); + } catch ( e ) { + assertEquals('Deleting similar surrogate pair at the beginning', 'crashed'); + } + + try { + assertEquivalent( + dmp.diff_toDelta([[DIFF_EQUAL, '\ud83c\udd70'], [DIFF_DELETE,'\ud83c\udd72'], [DIFF_EQUAL, '\ud83c\udd71']]), + dmp.diff_toDelta(dmp.diff_main('\ud83c\udd70\ud83c\udd72\ud83c\udd71', '\ud83c\udd70\ud83c\udd71')) + ); + } catch ( e ) { + assertEquals('Deleting similar surrogate pair in the middle', 'crashed'); + } + + try { + assertEquivalent( + dmp.diff_toDelta([[DIFF_DELETE, '\ud83c\udd70'], [DIFF_INSERT, '\ud83c\udd71']]), + dmp.diff_toDelta([[DIFF_EQUAL, '\ud83c'], [DIFF_DELETE, '\udd70'], [DIFF_INSERT, '\udd71']]), + ); + } catch ( e ) { + assertEquals('Swap surrogate pair', 'crashed'); + } + + try { + assertEquivalent( + dmp.diff_toDelta([[DIFF_INSERT, '\ud83c\udd70'], [DIFF_DELETE, '\ud83c\udd71']]), + dmp.diff_toDelta([[DIFF_EQUAL, '\ud83c'], [DIFF_INSERT, '\udd70'], [DIFF_DELETE, '\udd71']]), + ); + } catch ( e ) { + assertEquals('Swap surrogate pair', 'crashed'); + } + + // Empty diff groups + assertEquivalent( + dmp.diff_toDelta([[DIFF_EQUAL, 'abcdef'], [DIFF_DELETE, ''], [DIFF_INSERT, 'ghijk']]), + dmp.diff_toDelta([[DIFF_EQUAL, 'abcdef'], [DIFF_INSERT, 'ghijk']]), + ); + + // Different versions of the library may have created deltas with + // half of a surrogate pair encoded as if it were valid UTF-8 + try { + assertEquivalent( + dmp.diff_toDelta(dmp.diff_fromDelta('\ud83c\udd70', '-2\t+%F0%9F%85%B1')), + dmp.diff_toDelta(dmp.diff_fromDelta('\ud83c\udd70', '=1\t-1\t+%ED%B5%B1')) + ); + } catch ( e ) { + assertEquals('Decode UTF8-encoded surrogate half', 'crashed'); + } + // Verify pool of unchanged characters. diffs = [[DIFF_INSERT, 'A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ']]; var text2 = dmp.diff_text2(diffs); diff --git a/objectivec/DiffMatchPatch.m b/objectivec/DiffMatchPatch.m index 580f2656..0c56201d 100755 --- a/objectivec/DiffMatchPatch.m +++ b/objectivec/DiffMatchPatch.m @@ -1299,7 +1299,28 @@ - (NSString *)diff_text2:(NSMutableArray *)diffs; - (NSString *)diff_toDelta:(NSMutableArray *)diffs; { NSMutableString *delta = [NSMutableString string]; + UniChar lastEnd = 0; for (Diff *aDiff in diffs) { + if (0 == [aDiff.text length]) { + continue; + } + + UniChar thisTop = [aDiff.text characterAtIndex:0]; + UniChar thisEnd = [aDiff.text characterAtIndex:([aDiff.text length]-1)]; + + if (CFStringIsSurrogateHighCharacter(thisEnd)) { + lastEnd = thisEnd; + aDiff.text = [aDiff.text substringToIndex:([aDiff.text length] - 1)]; + } + + if (0 != lastEnd && CFStringIsSurrogateHighCharacter(lastEnd) && CFStringIsSurrogateLowCharacter(thisTop)) { + aDiff.text = [NSString stringWithFormat:@"%C%@", lastEnd, aDiff.text]; + } + + if (0 == [aDiff.text length]) { + continue; + } + switch (aDiff.operation) { case DIFF_INSERT: [delta appendFormat:@"+%@\t", [[aDiff.text diff_stringByAddingPercentEscapesForEncodeUriCompatibility] @@ -1321,6 +1342,176 @@ - (NSString *)diff_toDelta:(NSMutableArray *)diffs; return delta; } +- (NSUInteger)diff_digit16:(unichar)c +{ + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'A': case 'a': return 10; + case 'B': case 'b': return 11; + case 'C': case 'c': return 12; + case 'D': case 'd': return 13; + case 'E': case 'e': return 14; + case 'F': case 'f': return 15; + default: + [NSException raise:@"Invalid percent-encoded string" format:@"%c is not a hex digit", c]; + } +} + +/** +* Decode a percent-encoded UTF-8 string into a string of UTF-16 code units +* This is more permissive than `stringByRemovingPercentEncoding` because +* that fails if the input represents invalid Unicode characters. However, different +* diff-match-patch libraries may encode surrogate halves as if they were valid +* Unicode code points. Therefore, instead of failing or corrupting the output, which +* `stringByRemovingPercentEncoding` does when it inserts "(null)" in these places +* we can decode it anyway and then once the string is reconstructed from the diffs +* we'll end up with valid Unicode again, after the surrogate halves are re-joined +*/ +- (NSString *)diff_decodeURIWithText:(NSString *)percentEncoded +{ + unichar decoded[[percentEncoded length]]; + NSInteger input = 0; + NSInteger output = 0; + + @try { + while (input < [percentEncoded length]) { + unichar c = [percentEncoded characterAtIndex:input]; + + // not special, so just return it + if ('%' != c) { + decoded[output++] = c; + input += 1; + continue; + } + + NSUInteger byte1 = ([self diff_digit16:[percentEncoded characterAtIndex:(input+1)]] << 4) + + [self diff_digit16:[percentEncoded characterAtIndex:(input+2)]]; + + // single-byte UTF-8 first byte has bitmask 0xxx xxxx + if ((byte1 & 0x80) == 0) { + decoded[output++] = byte1; + input += 3; + continue; + } + + // at least one continuation byte + if ('%' != [percentEncoded characterAtIndex:(input + 3)]) { + return nil; + } + + NSUInteger byte2 = ([self diff_digit16:[percentEncoded characterAtIndex:(input+4)]] << 4) + + [self diff_digit16:[percentEncoded characterAtIndex:(input+5)]]; + + // continuation bytes have bitmask 10xx xxxx + if ((byte2 & 0xC0) != 0x80) { + return nil; + } + + // continuation bytes thus only contribute six bits each + // these data bits are found with the bit mask xx11 1111 + byte2 = byte2 & 0x3F; + + // in two-byte sequences the first byte has bitmask 110x xxxx + if ((byte1 & 0xE0) == 0xC0) { + // byte1 ___x xxxx << 6 + // byte2 __yy yyyy + // value x xxxxyy yyyy -> 11 bits + decoded[output++] = ((byte1 & 0x1F) << 6) | byte2; + input += 6; + continue; + } + + // at least two continuation bytes + if ('%' != [percentEncoded characterAtIndex:(input + 6)]) { + return nil; + } + + NSUInteger byte3 = ([self diff_digit16:[percentEncoded characterAtIndex:(input+7)]] << 4) + + [self diff_digit16:[percentEncoded characterAtIndex:(input+8)]]; + + if ((byte3 & 0xC0) != 0x80) { + return nil; + } + + byte3 = byte3 & 0x3F; + + // in three-byte sequences the first byte has bitmask 1110 xxxx + if ((byte1 & 0xF0) == 0xE0) { + // byte1 ____ xxxx << 12 + // byte2 __yy yyyy << 6 + // byte3 __zz zzzz + // value xxxxyy yyyyzz zzzz -> 16 bits + decoded[output++] = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3; + input += 9; + continue; + } + + // three continuation bytes + if ('%' != [percentEncoded characterAtIndex:(input + 9)]) { + return nil; + } + + NSUInteger byte4 = ([self diff_digit16:[percentEncoded characterAtIndex:(input+10)]] << 4) + + [self diff_digit16:[percentEncoded characterAtIndex:(input+11)]]; + + if ((byte4 & 0xC0) != 0x80) { + return nil; + } + + byte4 = byte4 & 0x3F; + + // in four-byte sequences the first byte has bitmask 1111 0xxx + if ((byte1 & 0xF8) == 0xF0) { + // byte1 ____ _xxx << 18 + // byte2 __yy yyyy << 12 + // byte3 __zz zzzz << 6 + // byte4 __tt tttt + // value xxxyy yyyyzz zzzztt tttt -> 21 bits + NSUInteger codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4; + if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) { + codePoint -= 0x010000; + decoded[output++] = ((codePoint >> 10) & 0x3FF) | 0xD800; + decoded[output++] = 0xDC00 | (codePoint & 0x3FF); + input += 12; + continue; + } + } + + return nil; + } + } + @catch (NSException *e) { + return nil; + } + + // some objective-c versions of the library produced patches with + // (null) in the place where surrogates were split across diff + // boundaries. if we leave those in we'll be stuck with a + // high-surrogate (null) low-surrogate pattern that will break + // deeper in the library or consuming application. we'll "fix" + // these by dropping the (null) and re-joining the surrogate halves + NSString *result = [NSString stringWithCharacters:decoded length:output]; + NSRegularExpression *replacer = [NSRegularExpression + regularExpressionWithPattern:@"([\\x{D800}-\\x{DBFF}])\\(null\\)([\\x{DC00}-\\x{DFFF}])" + options:0 + error:nil]; + + return [replacer + stringByReplacingMatchesInString:result + options:0 + range:NSMakeRange(0, [result length]) + withTemplate:@"$1$2"]; +} + /** * Given the original text1, and an encoded NSString which describes the * operations required to transform text1 into text2, compute the full diff. @@ -1348,7 +1539,7 @@ - (NSMutableArray *)diff_fromDeltaWithText:(NSString *)text1 NSString *param = [token substringFromIndex:1]; switch ([token characterAtIndex:0]) { case '+': - param = [param diff_stringByReplacingPercentEscapesForEncodeUriCompatibility]; + param = [self diff_decodeURIWithText:param]; if (param == nil) { if (error != NULL) { errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: diff --git a/objectivec/Tests/DiffMatchPatchTest.m b/objectivec/Tests/DiffMatchPatchTest.m index 9697b04c..7e31508b 100755 --- a/objectivec/Tests/DiffMatchPatchTest.m +++ b/objectivec/Tests/DiffMatchPatchTest.m @@ -752,6 +752,68 @@ - (void)test_diff_deltaTest { XCTAssertEqualObjects(diffs, [dmp diff_fromDeltaWithText:text1 andDelta:delta error:NULL], @"diff_fromDelta: Unicode 2."); + diffs = [dmp diff_mainOfOldString:@"βΊοΈππΏ" andNewString:@"βΊοΈπππΏ"]; + delta = [dmp diff_toDelta:diffs]; + + XCTAssertEqualObjects(delta, @"=2\t+%F0%9F%98%83\t=4", @"Delta should match the expected string"); + + diffs = [dmp diff_mainOfOldString:@"βΊοΈππΏ" andNewString:@"βΊοΈπππΏ"]; + NSArray *patches = [dmp patch_makeFromDiffs:diffs]; + NSArray *patchResult = [dmp patch_apply:patches toString:@"βΊοΈππΏ"]; + + expectedString = [patchResult firstObject]; + XCTAssertEqualObjects(@"βΊοΈπππΏ", expectedString, @"Output String should match the Edited one!"); + + // Unicode - splitting surrogates + + // Inserting similar surrogate pair at beginning + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_INSERT andText:@"π ±"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"π °π ±"], + nil]; + XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"π °π ±" andNewString:@"π ±π °π ±"]]); + + // Inserting similar surrogate pair in the middle + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"π °"], + [Diff diffWithOperation:DIFF_INSERT andText:@"π °"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"π ±"], + nil]; + XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"π °π ±" andNewString:@"π °π °π ±"]]); + + // Deleting similar surrogate pair at the beginning + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"π ±"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"π °π ±"], + nil]; + XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"π ±π °π ±" andNewString:@"π °π ±"]]); + + // Deleting similar surrogate pair in the middle + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"π °"], + [Diff diffWithOperation:DIFF_DELETE andText:@"π ²"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"π ±"], + nil]; + XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"π °π ²π ±" andNewString:@"π °π ±"]]); + + // Swapping surrogate pairs + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"π °"], + [Diff diffWithOperation:DIFF_INSERT andText:@"π ±"], + nil]; + XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"π °" andNewString:@"π ±"]]); + + // Swapping surrogate pairs + XCTAssertEqualObjects( [dmp diff_toDelta:([NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"π °"], + [Diff diffWithOperation:DIFF_INSERT andText:@"π ±"], + nil])], + [dmp diff_toDelta:([NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:[NSString stringWithFormat:@"%C", 0xd83c]], + [Diff diffWithOperation:DIFF_DELETE andText:[NSString stringWithFormat:@"%C", 0xdd70]], + [Diff diffWithOperation:DIFF_INSERT andText:[NSString stringWithFormat:@"%C", 0xdd71]], + nil])]); + // Verify pool of unchanged characters. diffs = [NSMutableArray arrayWithObject: [Diff diffWithOperation:DIFF_INSERT andText:@"A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # "]]; @@ -781,6 +843,11 @@ - (void)test_diff_deltaTest { expectedResult = [dmp diff_fromDeltaWithText:@"" andDelta:delta error:NULL]; XCTAssertEqualObjects(diffs, expectedResult, @"diff_fromDelta: 160kb string. Convert delta string into a diff."); + // Different versions of the library may have created deltas with + // half of a surrogate pair encoded as if it were valid UTF-8 + XCTAssertEqualObjects([dmp diff_toDelta:([dmp diff_fromDeltaWithText:@"π °" andDelta:@"-2\t+%F0%9F%85%B1" error:NULL])], + [dmp diff_toDelta:([dmp diff_fromDeltaWithText:@"π °" andDelta:@"=1\t-1\t+%ED%B5%B1" error:NULL])]); + [dmp release]; } diff --git a/python2/diff_match_patch.py b/python2/diff_match_patch.py index 806fe1e6..8b26125c 100644 --- a/python2/diff_match_patch.py +++ b/python2/diff_match_patch.py @@ -28,6 +28,7 @@ __author__ = 'fraser@google.com (Neil Fraser)' import re +import struct import sys import time import urllib @@ -1135,6 +1136,14 @@ def diff_levenshtein(self, diffs): levenshtein += max(insertions, deletions) return levenshtein + @classmethod + def is_high_surrogate(cls, c): + return 0xd800 <= struct.unpack('>H', c)[0] <= 0xdbff + + @classmethod + def is_low_surrogate(cls, c): + return 0xdc00 <= struct.unpack('>H', c)[0] <= 0xdfff + def diff_toDelta(self, diffs): """Crush the diff into an encoded string which describes the operations required to transform text1 into text2. @@ -1148,15 +1157,32 @@ def diff_toDelta(self, diffs): Delta text. """ text = [] + last_end = None for (op, data) in diffs: + if 0 == len(data): + continue + + encoded = data.encode('utf-16be') + this_top = encoded[0:2] + this_end = encoded[-2:] + + if self.is_high_surrogate(this_end): + last_end = this_end + encoded = encoded[0:-2] + + if last_end and self.is_high_surrogate(last_end) and self.is_low_surrogate(this_top): + encoded = last_end + encoded + + if 0 == len(encoded): + continue + if op == self.DIFF_INSERT: # High ascii will raise UnicodeDecodeError. Use Unicode instead. - data = data.encode("utf-8") - text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# ")) + text.append("+" + urllib.quote(encoded.decode('utf-16be').encode('utf-8'), "!~*'();/?:@&=+$,# ")) elif op == self.DIFF_DELETE: - text.append("-%d" % len(data)) + text.append("-%d" % (len(encoded) // 2)) elif op == self.DIFF_EQUAL: - text.append("=%d" % len(data)) + text.append("=%d" % (len(encoded) // 2)) return "\t".join(text) def diff_fromDelta(self, text1, delta): diff --git a/python2/tests/diff_match_patch_test.py b/python2/tests/diff_match_patch_test.py index 661a6b67..94f5fd35 100644 --- a/python2/tests/diff_match_patch_test.py +++ b/python2/tests/diff_match_patch_test.py @@ -441,6 +441,86 @@ def testDiffDelta(self): # Convert delta string into a diff. self.assertEquals(diffs, self.dmp.diff_fromDelta(text1, delta)) + diffs = [(self.dmp.DIFF_EQUAL, u"\ud83d\ude4b\ud83d"), (self.dmp.DIFF_INSERT, u"\ude4c\ud83d"), (self.dmp.DIFF_EQUAL, u"\ude4b")] + delta = self.dmp.diff_toDelta(diffs) + self.assertEquals("=2\t+%F0%9F%99%8C\t=2", delta) + + # Unicode: split surrogates + # Inserting similar surrogate pair at beginning + self.assertEquals( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_INSERT, u'\U0001F171'), + (self.dmp.DIFF_EQUAL, u'\U0001F170\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + u'\U0001F170\U0001F171', + u'\U0001F171\U0001F170\U0001F171' + )) + ) + + # Inserting similar surrogate pair in the middle + self.assertEquals( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_EQUAL, u'\U0001F170'), + (self.dmp.DIFF_INSERT, u'\U0001F172'), + (self.dmp.DIFF_EQUAL, u'\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + u'\U0001F170\U0001F171', + u'\U0001F170\U0001F172\U0001F171' + )) + ) + + # Deleting similar surogate pair at the beginning + self.assertEquals( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_DELETE, u'\U0001F171'), + (self.dmp.DIFF_EQUAL, u'\U0001F170\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + u'\U0001F171\U0001F170\U0001F171', + u'\U0001F170\U0001F171' + )) + ) + + # Deleting similar surogate pair in the middle + self.assertEquals( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_EQUAL, u'\U0001F170'), + (self.dmp.DIFF_DELETE, u'\U0001F172'), + (self.dmp.DIFF_EQUAL, u'\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + u'\U0001F170\U0001F172\U0001F171', + u'\U0001F170\U0001F171' + )) + ) + + # Swap surrogate pair + self.assertEquals( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_DELETE, u'\U0001F170'), + (self.dmp.DIFF_INSERT, u'\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + u'\U0001F170', + u'\U0001F171' + )) + ) + + # Swap surrogate pair, force the invalid diff groups + self.assertEquals( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_INSERT, u'\U0001F170'), + (self.dmp.DIFF_DELETE, u'\U0001F171') + ]), + self.dmp.diff_toDelta([ + (self.dmp.DIFF_EQUAL, u'\ud83c'), + (self.dmp.DIFF_INSERT, u'\udd70'), + (self.dmp.DIFF_DELETE, u'\udd71') + ]) + ) + # Verify pool of unchanged characters. diffs = [(self.dmp.DIFF_INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")] text2 = self.dmp.diff_text2(diffs) diff --git a/python3/diff_match_patch.py b/python3/diff_match_patch.py index cc7f5907..3bf825c5 100644 --- a/python3/diff_match_patch.py +++ b/python3/diff_match_patch.py @@ -26,6 +26,7 @@ __author__ = 'fraser@google.com (Neil Fraser)' import re +import struct import sys import time import urllib.parse @@ -1147,14 +1148,17 @@ def diff_toDelta(self, diffs): """ text = [] for (op, data) in diffs: + if 0 == len(data): + continue + if op == self.DIFF_INSERT: # High ascii will raise UnicodeDecodeError. Use Unicode instead. data = data.encode("utf-8") text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# ")) elif op == self.DIFF_DELETE: - text.append("-%d" % len(data)) + text.append("-%d" % (len(data.encode('utf-16-be')) // 2)) elif op == self.DIFF_EQUAL: - text.append("=%d" % len(data)) + text.append("=%d" % (len(data.encode('utf-16-be')) // 2)) return "\t".join(text) def diff_fromDelta(self, text1, delta): @@ -1172,7 +1176,8 @@ def diff_fromDelta(self, text1, delta): ValueError: If invalid input. """ diffs = [] - pointer = 0 # Cursor in text1 + as_utf16 = text1.encode('utf-16-be') + pointer = 0 # Cursor in as_utf16 tokens = delta.split("\t") for token in tokens: if token == "": @@ -1191,8 +1196,8 @@ def diff_fromDelta(self, text1, delta): raise ValueError("Invalid number in diff_fromDelta: " + param) if n < 0: raise ValueError("Negative number in diff_fromDelta: " + param) - text = text1[pointer : pointer + n] - pointer += n + text = as_utf16[pointer : pointer + n * 2].decode('utf-16-be') + pointer += n * 2 if token[0] == "=": diffs.append((self.DIFF_EQUAL, text)) else: @@ -1201,10 +1206,10 @@ def diff_fromDelta(self, text1, delta): # Anything else is an error. raise ValueError("Invalid diff operation in diff_fromDelta: " + token[0]) - if pointer != len(text1): + if pointer != len(as_utf16): raise ValueError( "Delta length (%d) does not equal source text length (%d)." % - (pointer, len(text1))) + (pointer, len(as_utf16))) return diffs # MATCH FUNCTIONS diff --git a/python3/tests/diff_match_patch_test.py b/python3/tests/diff_match_patch_test.py index 3659d3e7..4ff16abd 100644 --- a/python3/tests/diff_match_patch_test.py +++ b/python3/tests/diff_match_patch_test.py @@ -18,6 +18,7 @@ """ import imp +import json import os import sys import time @@ -444,6 +445,12 @@ def testDiffDelta(self): # Convert delta string into a diff. self.assertEqual(diffs, self.dmp.diff_fromDelta(text1, delta)) + diffs = self.dmp.diff_main("\U0001F64B\U0001F64B", "\U0001F64B\U0001F64C\U0001F64B") + delta = self.dmp.diff_toDelta(diffs) + self.assertEqual("=2\t+%F0%9F%99%8C\t=2", delta) + + self.assertEqual(diffs, self.dmp.diff_fromDelta("\U0001F64B\U0001F64B", "=2\t+%F0%9F%99%8C\t=2")) + # Verify pool of unchanged characters. diffs = [(self.dmp.DIFF_INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")] text2 = self.dmp.diff_text2(diffs) @@ -455,6 +462,69 @@ def testDiffDelta(self): # Convert delta string into a diff. self.assertEqual(diffs, self.dmp.diff_fromDelta("", delta)) + # Unicode: split surrogates + self.assertEqual( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_INSERT, '\U0001F171'), + (self.dmp.DIFF_EQUAL, '\U0001F170\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + '\U0001F170\U0001F171', + '\U0001F171\U0001F170\U0001F171' + )), + 'Inserting similar surrogate pair at beginning' + ) + + self.assertEqual( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_EQUAL, '\U0001F170'), + (self.dmp.DIFF_INSERT, '\U0001F172'), + (self.dmp.DIFF_EQUAL, '\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + '\U0001F170\U0001F171', + '\U0001F170\U0001F172\U0001F171' + )), + 'Inserting similar surrogate pair in the middle' + ) + + self.assertEqual( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_DELETE, '\U0001F171'), + (self.dmp.DIFF_EQUAL, '\U0001F170\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + '\U0001F171\U0001F170\U0001F171', + '\U0001F170\U0001F171' + )), + 'Deleting similar surogate pair at the beginning' + ) + + self.assertEqual( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_EQUAL, '\U0001F170'), + (self.dmp.DIFF_DELETE, '\U0001F172'), + (self.dmp.DIFF_EQUAL, '\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + '\U0001F170\U0001F172\U0001F171', + '\U0001F170\U0001F171' + )), + 'Deleting similar surogate pair in the middle' + ) + + self.assertEqual( + self.dmp.diff_toDelta([ + (self.dmp.DIFF_DELETE, '\U0001F170'), + (self.dmp.DIFF_INSERT, '\U0001F171') + ]), + self.dmp.diff_toDelta(self.dmp.diff_main( + '\U0001F170', + '\U0001F171' + )), + 'Swap surrogate pair' + ) + # 160 kb string. a = "abcdefghij" for i in range(14):