diff --git a/bower.json b/bower.json new file mode 100644 index 0000000..dd4da06 --- /dev/null +++ b/bower.json @@ -0,0 +1,35 @@ +{ + "name": "htmldiff", + "version": "0.0.4", + "description": "HTML Diffing in JavaScript (CoffeeScript)", + "main": "dist/htmldiff.js", + "license": "MIT (Expat)", + "authors": [ + "The Network Inc.", + "https://github.com/brendanjerwin", + "https://github.com/tnwinc", + "https://github.com/myobie/htmldiff" + ], + "repository": { + "type": "git", + "url": "git://github.com/benjamine/jsondiffpatch.git" + }, + "keywords": [ + "html", + "diff", + "split" + ], + "ignore": [ + "gulpfile.js" + ], + "homepage": "https://github.com/enkodellc/htmldiff", + "_release": "0.0.4", + "_resolution": { + "type": "version", + "tag": "v0.0.4", + "commit": "26d252d94bdd7dad22df8ac273990266aa677cd6" + }, + "_source": "https://github.com/enkodellc/htmldiff.js.git", + "_target": "0.0.4", + "_originalSource": "htmldiff" +} diff --git a/dist/htmldiff.js b/dist/htmldiff.js new file mode 100644 index 0000000..ac30265 --- /dev/null +++ b/dist/htmldiff.js @@ -0,0 +1,476 @@ +/* + * htmldiff.js is a library that compares HTML content. It creates a diff between two + * HTML documents by combining the two documents and wrapping the differences with + * and tags. Here is a high-level overview of how the diff works. + * + * 1. Tokenize the before and after HTML with html_to_tokens. + * 2. Generate a list of operations that convert the before list of tokens to the after + * list of tokens with calculate_operations, which does the following: + * a. Find all the matching blocks of tokens between the before and after lists of + * tokens with find_matching_blocks. This is done by finding the single longest + * matching block with find_match, then recursively finding the next longest + * matching block that precede and follow the longest matching block with + * recursively_find_matching_blocks. + * b. Determine insertions, deletions, and replacements from the matching blocks. + * This is done in calculate_operations. + * 3. Render the list of operations by wrapping tokens with and tags where + * appropriate with render_operations. + * + * Example usage: + * + * htmldiff = require 'htmldiff.js' + * + * htmldiff '

this is some text

', '

this is some more text

' + * == '

this is some more text

' + * + * htmldiff '

this is some text

', '

this is some more text

', 'diff-class' + * == '

this is some more text

' + */ +var Match, calculate_operations, consecutive_where, create_index, diff, diff_dual_pane, ends_in_end_script_tag, find_match, find_matching_blocks, html_to_tokens, is_end_of_tag, is_script_tag, is_start_of_tag, is_tag, is_whitespace, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, render_operations_dual_pane, return_dual_pane, wrap; + +is_end_of_tag = function(char) { + return char === '>'; +}; + +is_start_of_tag = function(char) { + return char === '<'; +}; + +is_whitespace = function(char) { + return /^\s+$/.test(char); +}; + +is_tag = function(token) { + return /^\s*<[^>]+>\s*$/.test(token); +}; + +isnt_tag = function(token) { + return !is_tag(token); +}; + +is_script_tag = function(token) { + return token === ''; +}; + +Match = (function() { + function Match(start_in_before, start_in_after, length) { + this.start_in_before = start_in_before; + this.start_in_after = start_in_after; + this.length = length; + this.end_in_before = (this.start_in_before + this.length) - 1; + this.end_in_after = (this.start_in_after + this.length) - 1; + } + + return Match; + +})(); + +return_dual_pane = function(before, after) { + return { + before: before, + after: after + }; +}; + +html_to_tokens = function(html) { + var char, current_word, mode, words, _i, _len; + mode = 'char'; + current_word = ''; + words = []; + for (_i = 0, _len = html.length; _i < _len; _i++) { + char = html[_i]; + switch (mode) { + case 'script': + if (is_end_of_tag(char)) { + current_word += '>'; + if (ends_in_end_script_tag(current_word)) { + words.push(current_word); + current_word = ''; + if (is_whitespace(char)) { + mode = 'whitespace'; + } else { + mode = 'char'; + } + } + } else { + current_word += char; + } + break; + case 'tag': + if (is_script_tag(current_word)) { + mode = 'script'; + current_word += char; + } else if (is_end_of_tag(char)) { + current_word += '>'; + words.push(current_word); + current_word = ''; + if (is_whitespace(char)) { + mode = 'whitespace'; + } else { + mode = 'char'; + } + } else { + current_word += char; + } + break; + case 'char': + if (is_start_of_tag(char)) { + if (current_word) { + words.push(current_word); + } + current_word = '<'; + mode = 'tag'; + } else if (/\s/.test(char)) { + if (current_word) { + words.push(current_word); + } + current_word = char; + mode = 'whitespace'; + } else if (/[\w\#@]+/i.test(char)) { + current_word += char; + } else { + if (current_word) { + words.push(current_word); + } + current_word = char; + } + break; + case 'whitespace': + if (is_start_of_tag(char)) { + if (current_word) { + words.push(current_word); + } + current_word = '<'; + mode = 'tag'; + } else if (is_whitespace(char)) { + current_word += char; + } else { + if (current_word) { + words.push(current_word); + } + current_word = char; + mode = 'char'; + } + break; + default: + throw new Error('Unknown mode ' + mode); + } + } + if (current_word) { + words.push(current_word); + } + return words; +}; + +find_match = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after) { + var best_match_in_after, best_match_in_before, best_match_length, index_in_after, index_in_before, locations_in_after, looking_for, match, match_length_at, new_match_length, new_match_length_at, _i, _j, _len; + best_match_in_before = start_in_before; + best_match_in_after = start_in_after; + best_match_length = 0; + match_length_at = {}; + for (index_in_before = _i = start_in_before; start_in_before <= end_in_before ? _i < end_in_before : _i > end_in_before; index_in_before = start_in_before <= end_in_before ? ++_i : --_i) { + new_match_length_at = {}; + looking_for = before_tokens[index_in_before]; + locations_in_after = index_of_before_locations_in_after_tokens[looking_for]; + for (_j = 0, _len = locations_in_after.length; _j < _len; _j++) { + index_in_after = locations_in_after[_j]; + if (index_in_after < start_in_after) { + continue; + } + if (index_in_after >= end_in_after) { + break; + } + if (match_length_at[index_in_after - 1] == null) { + match_length_at[index_in_after - 1] = 0; + } + new_match_length = match_length_at[index_in_after - 1] + 1; + new_match_length_at[index_in_after] = new_match_length; + if (new_match_length > best_match_length) { + best_match_in_before = index_in_before - new_match_length + 1; + best_match_in_after = index_in_after - new_match_length + 1; + best_match_length = new_match_length; + } + } + match_length_at = new_match_length_at; + } + if (best_match_length !== 0) { + match = new Match(best_match_in_before, best_match_in_after, best_match_length); + } + return match; +}; + +recursively_find_matching_blocks = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks) { + var match; + match = find_match(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after); + if (match != null) { + if (start_in_before < match.start_in_before && start_in_after < match.start_in_after) { + recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, match.start_in_before, start_in_after, match.start_in_after, matching_blocks); + } + matching_blocks.push(match); + if (match.end_in_before <= end_in_before && match.end_in_after <= end_in_after) { + recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, match.end_in_before + 1, end_in_before, match.end_in_after + 1, end_in_after, matching_blocks); + } + } + return matching_blocks; +}; + +create_index = function(p) { + var idx, index, token, _i, _len, _ref; + if (p.find_these == null) { + throw new Error('params must have find_these key'); + } + if (p.in_these == null) { + throw new Error('params must have in_these key'); + } + index = {}; + _ref = p.find_these; + for (_i = 0, _len = _ref.length; _i < _len; _i++) { + token = _ref[_i]; + index[token] = []; + idx = p.in_these.indexOf(token); + while (idx !== -1) { + index[token].push(idx); + idx = p.in_these.indexOf(token, idx + 1); + } + } + return index; +}; + +find_matching_blocks = function(before_tokens, after_tokens) { + var index_of_before_locations_in_after_tokens, matching_blocks; + matching_blocks = []; + index_of_before_locations_in_after_tokens = create_index({ + find_these: before_tokens, + in_these: after_tokens + }); + return recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks); +}; + +calculate_operations = function(before_tokens, after_tokens) { + var action_map, action_up_to_match_positions, index, is_single_whitespace, last_op, match, match_starts_at_current_position_in_after, match_starts_at_current_position_in_before, matches, op, operations, position_in_after, position_in_before, post_processed, _i, _j, _len, _len1; + if (before_tokens == null) { + throw new Error('before_tokens?'); + } + if (after_tokens == null) { + throw new Error('after_tokens?'); + } + position_in_before = position_in_after = 0; + operations = []; + action_map = { + 'false,false': 'replace', + 'true,false': 'insert', + 'false,true': 'delete', + 'true,true': 'none' + }; + matches = find_matching_blocks(before_tokens, after_tokens); + matches.push(new Match(before_tokens.length, after_tokens.length, 0)); + for (index = _i = 0, _len = matches.length; _i < _len; index = ++_i) { + match = matches[index]; + match_starts_at_current_position_in_before = position_in_before === match.start_in_before; + match_starts_at_current_position_in_after = position_in_after === match.start_in_after; + action_up_to_match_positions = action_map[[match_starts_at_current_position_in_before, match_starts_at_current_position_in_after].toString()]; + if (action_up_to_match_positions !== 'none') { + operations.push({ + action: action_up_to_match_positions, + start_in_before: position_in_before, + end_in_before: (action_up_to_match_positions !== 'insert' ? match.start_in_before - 1 : void 0), + start_in_after: position_in_after, + end_in_after: (action_up_to_match_positions !== 'delete' ? match.start_in_after - 1 : void 0) + }); + } + if (match.length !== 0) { + operations.push({ + action: 'equal', + start_in_before: match.start_in_before, + end_in_before: match.end_in_before, + start_in_after: match.start_in_after, + end_in_after: match.end_in_after + }); + } + position_in_before = match.end_in_before + 1; + position_in_after = match.end_in_after + 1; + } + post_processed = []; + last_op = { + action: 'none' + }; + is_single_whitespace = function(op) { + if (op.action !== 'equal') { + return false; + } + if (op.end_in_before - op.start_in_before !== 0) { + return false; + } + return /^\s$/.test(before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9)); + }; + for (_j = 0, _len1 = operations.length; _j < _len1; _j++) { + op = operations[_j]; + if (((is_single_whitespace(op)) && last_op.action === 'replace') || (op.action === 'replace' && last_op.action === 'replace')) { + last_op.end_in_before = op.end_in_before; + last_op.end_in_after = op.end_in_after; + } else { + post_processed.push(op); + last_op = op; + } + } + return post_processed; +}; + +consecutive_where = function(start, content, predicate) { + var answer, index, last_matching_index, token, _i, _len; + content = content.slice(start, +content.length + 1 || 9e9); + last_matching_index = void 0; + for (index = _i = 0, _len = content.length; _i < _len; index = ++_i) { + token = content[index]; + answer = predicate(token); + if (answer === true) { + last_matching_index = index; + } + if (answer === false) { + break; + } + } + if (last_matching_index != null) { + return content.slice(0, +last_matching_index + 1 || 9e9); + } + return []; +}; + +wrap = function(tag, content) { + var length, non_tags, position, rendering, tags; + rendering = ''; + position = 0; + length = content.length; + while (true) { + if (position >= length) { + break; + } + non_tags = consecutive_where(position, content, isnt_tag); + position += non_tags.length; + if (non_tags.length !== 0) { + rendering += '<' + tag + '>' + (non_tags.join('')) + ''; + } + if (position >= length) { + break; + } + tags = consecutive_where(position, content, is_tag); + position += tags.length; + rendering += tags.join(''); + } + return rendering; +}; + +op_map = { + equal: function(op, before_tokens, after_tokens) { + return before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9).join(''); + }, + insert: function(op, before_tokens, after_tokens) { + var val; + val = after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9); + return wrap('ins', val); + }, + delete: function(op, before_tokens, after_tokens) { + var val; + val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9); + return wrap('del', val); + } +}; + +op_map.replace = function(op, before_tokens, after_tokens) { + return [op_map['delete'](op, before_tokens, after_tokens), op_map.insert(op, before_tokens, after_tokens)]; +}; + +render_operations = function(before_tokens, after_tokens, operations) { + var op, rendering, result, _i, _len; + rendering = ''; + for (_i = 0, _len = operations.length; _i < _len; _i++) { + op = operations[_i]; + result = op_map[op.action](op, before_tokens, after_tokens); + if (op.action === 'replace') { + rendering += result[0] + result[1]; + } else { + rendering += result; + } + } + return rendering; +}; + +render_operations_dual_pane = function(before_tokens, after_tokens, operations) { + var after_render, before_render, next_block, op, _i, _len; + before_render = ''; + after_render = ''; + for (_i = 0, _len = operations.length; _i < _len; _i++) { + op = operations[_i]; + next_block = op_map[op.action](op, before_tokens, after_tokens); + switch (op.action) { + case 'equal': + before_render += next_block; + after_render += next_block; + break; + case 'insert': + after_render += next_block; + break; + case 'delete': + before_render += next_block; + break; + case 'replace': + before_render += next_block[0]; + after_render += next_block[1]; + } + } + return return_dual_pane(before_render, after_render); +}; + +diff_dual_pane = function(before, after) { + var ops; + if (before === after) { + return return_dual_pane(before, after); + } + before = html_to_tokens(before); + after = html_to_tokens(after); + ops = calculate_operations(before, after); + return render_operations_dual_pane(before, after, ops); +}; + +diff = function(before, after) { + var ops; + if (before === after) { + return before; + } + before = html_to_tokens(before); + after = html_to_tokens(after); + ops = calculate_operations(before, after); + return render_operations(before, after, ops); +}; + +diff.html_to_tokens = html_to_tokens; + +diff.find_matching_blocks = find_matching_blocks; + +find_matching_blocks.find_match = find_match; + +find_matching_blocks.create_index = create_index; + +diff.calculate_operations = calculate_operations; + +diff.render_operations = render_operations; + +diff.render_operations_dual_pane = render_operations_dual_pane; + +diff.diff_dual_pane = diff_dual_pane; + +if (typeof define === 'function') { + define([], function() { + return diff; + }); +} else if (typeof module !== 'undefined' && module !== null) { + module.exports = diff; +} else { + this.htmldiff = diff; +} diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee index dd5c658..c230cac 100644 --- a/src/htmldiff.coffee +++ b/src/htmldiff.coffee @@ -3,12 +3,19 @@ is_start_of_tag = (char)-> char is '<' is_whitespace = (char)-> /^\s+$/.test char is_tag = (token)-> /^\s*<[^>]+>\s*$/.test token isnt_tag = (token)-> not is_tag token +is_script_tag = (token) -> token is ' + token_end = token.substr token.length - 9 + token_end is '' class Match constructor: (@start_in_before, @start_in_after, @length)-> @end_in_before = (@start_in_before + @length) - 1 @end_in_after = (@start_in_after + @length) - 1 +return_dual_pane = (before, after)-> + { before: before, after: after } + html_to_tokens = (html)-> mode = 'char' current_word = '' @@ -16,8 +23,23 @@ html_to_tokens = (html)-> for char in html switch mode - when 'tag' + when 'script' if is_end_of_tag char + current_word += '>' + if ends_in_end_script_tag current_word + words.push current_word + current_word = '' + if is_whitespace char + mode = 'whitespace' + else + mode = 'char' + else + current_word += char + when 'tag' + if is_script_tag current_word + mode = 'script' + current_word += char + else if is_end_of_tag char current_word += '>' words.push current_word current_word = '' @@ -262,16 +284,47 @@ op_map = wrap 'del', val op_map.replace = (op, before_tokens, after_tokens)-> - (op_map.delete op, before_tokens, after_tokens) + - (op_map.insert op, before_tokens, after_tokens) + [(op_map.delete op, before_tokens, after_tokens), + (op_map.insert op, before_tokens, after_tokens)] render_operations = (before_tokens, after_tokens, operations)-> rendering = '' for op in operations - rendering += op_map[op.action] op, before_tokens, after_tokens + result = op_map[op.action] op, before_tokens, after_tokens + if op.action is 'replace' + rendering += result[0] + result[1] + else + rendering += result return rendering +render_operations_dual_pane = (before_tokens, after_tokens, operations)-> + before_render = '' + after_render = '' + for op in operations + next_block = op_map[op.action] op, before_tokens, after_tokens + switch op.action + when "equal" + before_render += next_block + after_render += next_block + when "insert" then after_render += next_block + when "delete" then before_render += next_block + when "replace" + before_render += next_block[0] + after_render += next_block[1] + + return_dual_pane(before_render, after_render) + +diff_dual_pane = (before, after ) -> + return return_dual_pane(before, after) if before is after + + before = html_to_tokens before + after = html_to_tokens after + + ops = calculate_operations before, after + + render_operations_dual_pane before, after, ops + diff = (before, after)-> return before if before is after @@ -282,13 +335,14 @@ diff = (before, after)-> render_operations before, after, ops - diff.html_to_tokens = html_to_tokens diff.find_matching_blocks = find_matching_blocks find_matching_blocks.find_match = find_match find_matching_blocks.create_index = create_index diff.calculate_operations = calculate_operations diff.render_operations = render_operations +diff.render_operations_dual_pane = render_operations_dual_pane +diff.diff_dual_pane = diff_dual_pane if typeof define is 'function' define [], ()-> diff diff --git a/test/diff.spec.coffee b/test/diff.spec.coffee index 68d0cc9..724a515 100644 --- a/test/diff.spec.coffee +++ b/test/diff.spec.coffee @@ -15,3 +15,17 @@ describe 'Diff', -> it 'should mark the new letter', -> (expect @res).to.equal 'input 2' + + describe 'When dual pane is checked', -> + beforeEach -> + @res = @cut.diff_dual_pane 'input text', 'input text' + + it 'should should return the equivalent text', -> + (expect @res).eql { before: 'input text', after: 'input text' } + + describe 'When dual pane is checked', -> + beforeEach -> + @res = @cut.diff_dual_pane 'input text', 'input texts' + + it 'should should return the text', -> + (expect @res).eql { before: 'input text', after: 'input texts' } diff --git a/test/html_to_tokens.spec.coffee b/test/html_to_tokens.spec.coffee index ac6c235..f1c001d 100644 --- a/test/html_to_tokens.spec.coffee +++ b/test/html_to_tokens.spec.coffee @@ -28,3 +28,7 @@ describe 'html_to_tokens', -> it 'should identify self closing tags as tokens', -> (expect @cut '

hello
goodbye

') .eql ['

', 'hello', '
', 'goodbye', '

'] + + it 'should identify scripts as single tokens', -> + (expect @cut '') + .eql [''] diff --git a/test/render_operations_dual_pane.spec.coffee b/test/render_operations_dual_pane.spec.coffee new file mode 100644 index 0000000..95b6712 --- /dev/null +++ b/test/render_operations_dual_pane.spec.coffee @@ -0,0 +1,67 @@ +describe 'render_operations_dual_pane', -> + beforeEach -> + diff = (require '../src/htmldiff.coffee') + @cut = (before, after)-> + ops = diff.calculate_operations before, after + diff.render_operations_dual_pane before, after, ops + + it 'should be a function', -> + (expect @cut).is.a 'function' + + describe 'equal', -> + beforeEach -> + before = ['this', ' ', 'is', ' ', 'a', ' ', 'test'] + @res = @cut before, before + + it 'should output the text', -> + (expect @res).eql { before: 'this is a test', after: 'this is a test' } + + describe 'insert', -> + beforeEach -> + before = ['this', ' ', 'is'] + after = ['this', ' ', 'is', ' ', 'a', ' ', 'test'] + @res = @cut before, after + + it 'should wrap in an ', -> + (expect @res).eql { before: "this is", after: "this is a test" } + + describe 'delete', -> + beforeEach -> + before = ['this', ' ', 'is', ' ', 'a', ' ', 'test', \ + ' ', 'of', ' ', 'stuff'] + after = ['this', ' ', 'is', ' ', 'a', ' ', 'test'] + @res = @cut before, after + + it 'should wrap in a ', -> + (expect @res).to.eql { before: "this is a test of stuff", after: "this is a test" } + + + describe 'replace', -> + beforeEach -> + before = ['this', ' ', 'is', ' ', 'a', ' ', 'break'] + after = ['this', ' ', 'is', ' ', 'a', ' ', 'test'] + @res = @cut before, after + + it 'should wrap in both and ', -> + (expect @res).to.eql { before: "this is a break", after: "this is a test" } + + describe 'Dealing with tags', -> + beforeEach -> + before = ['

', 'a', '

'] + after = ['

', 'a', ' ', 'b', '

', '

', 'c', '

'] + @res = @cut before, after + + it 'should make sure the tags are within the

tags', -> + (expect @res).to.eql { before: '

a

', after: '

a b

c

' } + + describe 'When there is a change at the beginning, in a

', -> + beforeEach -> + before = ['

', 'this', ' ', 'is', ' ', 'awesome', '

'] + after = ['

', 'I', ' ', 'is', ' ', 'awesome', '

'] + @res = @cut before, after + + it 'should keep the change inside the

', -> + (expect @res).to.eql({ + before: '

this is awesome

', + after: '

I is awesome

' + })