您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
improved word-based diff library with block move detection
当前为
// <syntaxhighlight lang="JavaScript"> // ==UserScript== // @name wDiff // @version 1.0.13 // @date September 10, 2014 // @description improved word-based diff library with block move detection // @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff // @source https://en.wikipedia.org/wiki/User:Cacycle/diff.js // @author Cacycle (https://en.wikipedia.org/wiki/User:Cacycle) // @license released into the public domain // @namespace https://en.wikipedia.org/wiki/User:Cacycle/ // ==/UserScript== /* Improved JavaScript diff library that returns html/css-formatted new text version with highlighted deletions, inserts, and block moves. It is compatible with all browsers and is not dependent on external libraries. An implementation of the word-based algorithm from: Communications of the ACM 21(4):264 (1978) http://doi.acm.org/10.1145/359460.359467 Additional features: * Word (token) types have been optimized for MediaWiki source texts * Resolution down to characters level * Highlighting of moved blocks and their original position marks * Stepwise split (paragraphs, sentences, words, chars) * Recursive diff * Additional post-pass-5 code for resolving islands caused by common tokens at the border of sequences of common tokens * Block move detection and visualization * Minimizing length of moved vs. static blocks * Bubbling up of ambiguous unresolved regions to next line break * Optional omission of unchanged irrelevant parts from the output * Fully customizable * Well commented and documented code This code is used by the MediaWiki in-browser text editors [[en:User:Cacycle/editor]] and [[en:User:Cacycle/wikEd]] and the enhanced diff view tool wikEdDiff [[en:User:Cacycle/wikEd]]. Usage: var diffHtml = wDiff.Diff(oldString, newString); diffHtml = wDiff.ShortenOutput(diffHtml); Datastructures (abbreviations from publication): text: objects for text related data .newText, new text .oldText: old text .string: new or old text to be diffed .tokens[]: token data list for new or old string (N and O) .prev: previous list item .next: next list item .token: token string .link: index of corresponding token in new or old text (OA and NA) .number: list enumeration number .parsed: token has been added to symbol table .unique: unique token in whole text .first: index of first token in tokens list .last: index of last token in tokens list .diff: diff html symbols: object for symbols table data .token[]: associative array (hash) of parsed tokens for passes 1 - 3, points to symbol[i] .symbol[]: array of objects that hold token counters and pointers: .newCount: new text token counter (NC) .oldCount: old text token counter (OC) .newToken: token index in text.newText.tokens .oldToken: token index in text.oldText.tokens .linked: flag: at least one unique token pair has been linked blocks[]: array of objects that holds block (consecutive text tokens) data in order of the new text .oldBlock: number of block in old text order .newBlock: number of block in new text order .oldNumber: old text token number of first token .newNumber: new text token number of first token .oldStart: old text token index of first token .count number of tokens .unique: contains unique matched token .words: word count .chars: char length .type: 'same', 'del', 'ins' .section: section number .group: group number of block .fixed: belongs to a fixed (not moved) group .string: string of block tokens groups[]: section blocks that are consecutive in old text .oldNumber: first block oldNumber .blockStart: first block index .blockEnd: last block index .unique: contains unique matched token .maxWords: word count of longest block .words: word count .chars: char count .fixed: not moved from original position .moved[]: list of groups that have been moved from this position .movedFrom: position this group has been moved from .color: color number of moved group .diff: group diff */ // JSHint options: W004: is already defined, W097: Use the function form of "use strict", W100: This character may get silently deleted by one or more browsers /* jshint -W004, -W097, -W100, newcap: false, browser: true, jquery: true, sub: true, bitwise: true, curly: true, evil: true, forin: true, freeze: true, immed: true, latedef: true, loopfunc: true, quotmark: single, undef: true */ /* global console */ // turn on ECMAScript 5 strict mode 'use strict'; // define global object var wDiff; if (wDiff === undefined) { wDiff = {}; } var WED; // // core diff settings // // enable block move layout with color coded blocks and marks at their original position if (wDiff.showBlockMoves === undefined) { wDiff.showBlockMoves = true; } // minimal number of real words for a moved block (0 for always showing color coded blocks) if (wDiff.blockMinLength === undefined) { wDiff.blockMinLength = 3; } // further resolve replacements character-wise from start and end if (wDiff.charDiff === undefined) { wDiff.charDiff = true; } // enable recursive diff to resolve problematic sequences if (wDiff.recursiveDiff === undefined) { wDiff.recursiveDiff = true; } // display blocks in different colors if (wDiff.coloredBlocks === undefined) { wDiff.coloredBlocks = false; } // UniCode letter support for regexps, from http://xregexp.com/addons/unicode/unicode-base.js v1.0.0 if (wDiff.letters === undefined) { wDiff.letters = 'a-zA-Z0-9' + '00AA00B500BA00C0-00D600D8-00F600F8-02C102C6-02D102E0-02E402EC02EE0370-037403760377037A-037D03860388-038A038C038E-03A103A3-03F503F7-0481048A-05270531-055605590561-058705D0-05EA05F0-05F20620-064A066E066F0671-06D306D506E506E606EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA07F407F507FA0800-0815081A082408280840-085808A008A2-08AC0904-0939093D09500958-09610971-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E460E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EC60EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10A0-10C510C710CD10D0-10FA10FC-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317D717DC1820-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541AA71B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C7D1CE9-1CEC1CEE-1CF11CF51CF61D00-1DBF1E00-1F151F18-1F1D1F20-1F451F48-1F4D1F50-1F571F591F5B1F5D1F5F-1F7D1F80-1FB41FB6-1FBC1FBE1FC2-1FC41FC6-1FCC1FD0-1FD31FD6-1FDB1FE0-1FEC1FF2-1FF41FF6-1FFC2071207F2090-209C21022107210A-211321152119-211D212421262128212A-212D212F-2139213C-213F2145-2149214E218321842C00-2C2E2C30-2C5E2C60-2CE42CEB-2CEE2CF22CF32D00-2D252D272D2D2D30-2D672D6F2D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE2E2F300530063031-3035303B303C3041-3096309D-309F30A1-30FA30FC-30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A48CA4D0-A4FDA500-A60CA610-A61FA62AA62BA640-A66EA67F-A697A6A0-A6E5A717-A71FA722-A788A78B-A78EA790-A793A7A0-A7AAA7F8-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2A9CFAA00-AA28AA40-AA42AA44-AA4BAA60-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADB-AADDAAE0-AAEAAAF2-AAF4AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB00-FB06FB13-FB17FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF21-FF3AFF41-FF5AFF66-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC'.replace(/(\w{4})/g, '\\u$1'); } // regExps for splitting text if (wDiff.regExpSplit === undefined) { wDiff.regExpSplit = { // paragraphs: after newline paragraph: /(.|\n)+?(?=(\n|$))/g, // sentences: after .spaces or before newline sentence: /\n|.*?\.( +|(?=\n))|.+?(?=\n)/g, // inline chunks // [[wiki link]] | {{template}} | [ext. link] |<html> | [[wiki link| | {{template| | url chunk: /\[\[[^\[\]\n]+\]\]|\{\{[^\{\}\n]+\}\}|\[[^\[\]\n]+\]|<[^<>\[\]\{\}\n]+>|\[\[[^\[\]\|\n]+\]\]\||\{\{[^\{\}\|\n]+\||\b((https?:|)\/\/)[^\x00-\x20\s"\[\]\x7f]+/g, // words, multi-char markup, and chars word: new RegExp('([' + wDiff.letters + '])+|\\[\\[|\\]\\]|\\{\\{|\\}\\}|&\\w+;|\'\'\'|\'\'|==+|\\{\\||\\|\\}|\\|-|.', 'g'), // chars character: /./g }; } // regExps for bubbling up gaps if (wDiff.regExpBubbleStop === undefined) { wDiff.regExpBubbleStop = /\n$/; } if (wDiff.regExpBubbleClosing === undefined) { wDiff.regExpBubbleClosing = /^[\s)\]}>\-–—.,:;?!’\/\\=+]/; } // regExp for counting words if (wDiff.regExpWordCount === undefined) { wDiff.regExpWordCount = new RegExp('(^|[^' + wDiff.letters + '])[' + wDiff.letters + '][' + wDiff.letters + '_\'’]*', 'g'); } // regExp for wiki code non-letter characters if (wDiff.regExpWikiCodeChars === undefined) { wDiff.regExpWikiCodeChars = /^[ \t\n\[\]{}|+\-!*#:;=<>'\/_,.&?]+$/; } // // shorten output settings // // characters before diff tag to search for previous heading, paragraph, line break, cut characters if (wDiff.headingBefore === undefined) { wDiff.headingBefore = 1500; } if (wDiff.paragraphBeforeMax === undefined) { wDiff.paragraphBeforeMax = 1500; } if (wDiff.paragraphBeforeMin === undefined) { wDiff.paragraphBeforeMin = 500; } if (wDiff.lineBeforeMax === undefined) { wDiff.lineBeforeMax = 1000; } if (wDiff.lineBeforeMin === undefined) { wDiff.lineBeforeMin = 500; } if (wDiff.blankBeforeMax === undefined) { wDiff.blankBeforeMax = 1000; } if (wDiff.blankBeforeMin === undefined) { wDiff.blankBeforeMin = 500; } if (wDiff.charsBefore === undefined) { wDiff.charsBefore = 500; } // characters after diff tag to search for next heading, paragraph, line break, or characters if (wDiff.headingAfter === undefined) { wDiff.headingAfter = 1500; } if (wDiff.paragraphAfterMax === undefined) { wDiff.paragraphAfterMax = 1500; } if (wDiff.paragraphAfterMin === undefined) { wDiff.paragraphAfterMin = 500; } if (wDiff.lineAfterMax === undefined) { wDiff.lineAfterMax = 1000; } if (wDiff.lineAfterMin === undefined) { wDiff.lineAfterMin = 500; } if (wDiff.blankAfterMax === undefined) { wDiff.blankAfterMax = 1000; } if (wDiff.blankAfterMin === undefined) { wDiff.blankAfterMin = 500; } if (wDiff.charsAfter === undefined) { wDiff.charsAfter = 500; } // lines before and after diff tag to search for previous heading, paragraph, line break, cut characters if (wDiff.linesBeforeMax === undefined) { wDiff.linesBeforeMax = 10; } if (wDiff.linesAfterMax === undefined) { wDiff.linesAfterMax = 10; } // maximal fragment distance to join close fragments if (wDiff.fragmentJoinLines === undefined) { wDiff.fragmentJoinLines = 5; } if (wDiff.fragmentJoinChars === undefined) { wDiff.fragmentJoinChars = 1000; } // // css classes // if (wDiff.stylesheet === undefined) { wDiff.stylesheet = '.wDiffTab:before { content: "→"; color: #bbb; font-size: smaller; }' + '.wDiffNewline:before { content: " "; }' + '.wDiffMarkRight:before { content: "▶"; }' + '.wDiffMarkLeft:before { content: "◀"; }' + '.wDiffDelete { font-weight: bold; background-color: #ffe49c; color: #222; border-radius: 0.25em; padding: 0.2em 1px; }' + '.wDiffInsert { font-weight: bold; background-color: #bbddff; color: #222; border-radius: 0.25em; padding: 0.2em 1px; }' + '.wDiffBlockLeft { font-weight: bold; background-color: #e8e8e8; border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; }' + '.wDiffBlockRight { font-weight: bold; background-color: #e8e8e8; border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; }' + '.wDiffMarkLeft { font-weight: bold; background-color: #ffe49c; color: #666; border-radius: 0.25em; padding: 0.2em; margin: 0 1px; }' + '.wDiffMarkRight { font-weight: bold; background-color: #ffe49c; color: #666; border-radius: 0.25em; padding: 0.2em; margin: 0 1px; }' + '.wDiffFragment { white-space: pre-wrap; background: #fff; border: #bbb solid; border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: sans-serif; font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 1em; margin: 0; }' + '.wDiffContainer { }' + '.wDiffNoChange { white-space: pre-wrap; background: #f0f0f0; border: #bbb solid; border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: sans-serif; font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 0.5em; margin: 1em 0; }' + '.wDiffSeparator { margin-bottom: 1em; }' + '.wDiffBlock { }' + '.wDiffBlock0 { background-color: #ffff60; }' + '.wDiffBlock1 { background-color: #c0ff60; }' + '.wDiffBlock2 { background-color: #ffd8ff; }' + '.wDiffBlock3 { background-color: #a0ffff; }' + '.wDiffBlock4 { background-color: #ffe840; }' + '.wDiffBlock5 { background-color: #bbccff; }' + '.wDiffBlock6 { background-color: #ffaaff; }' + '.wDiffBlock7 { background-color: #ffbbbb; }' + '.wDiffBlock8 { background-color: #a0e8a0; }' + '.wDiffMark { }' + '.wDiffMark0 { color: #ffff60; }' + '.wDiffMark1 { color: #c0ff60; }' + '.wDiffMark2 { color: #ffd8ff; }' + '.wDiffMark3 { color: #a0ffff; }' + '.wDiffMark4 { color: #ffd840; }' + '.wDiffMark5 { color: #bbccff; }' + '.wDiffMark6 { color: #ff99ff; }' + '.wDiffMark7 { color: #ff9999; }' + '.wDiffMark8 { color: #90d090; }' + '.wDiffBlockHighlight { background-color: #777; color: #fff; border: solid #777; border-width: 1px 0; }' + '.wDiffMarkHighlight { background-color: #777; color: #fff; }'; } // // css styles // if (wDiff.styleContainer === undefined) { wDiff.styleContainer = ''; } if (wDiff.StyleDelete === undefined) { wDiff.styleDelete = ''; } if (wDiff.styleInsert === undefined) { wDiff.styleInsert = ''; } if (wDiff.styleBlockLeft === undefined) { wDiff.styleBlockLeft = ''; } if (wDiff.styleBlockRight === undefined) { wDiff.styleBlockRight = ''; } if (wDiff.styleBlockHighlight === undefined) { wDiff.styleBlockHighlight = ''; } if (wDiff.styleBlockColor === undefined) { wDiff.styleBlockColor = []; } if (wDiff.styleMarkLeft === undefined) { wDiff.styleMarkLeft = ''; } if (wDiff.styleMarkRight === undefined) { wDiff.styleMarkRight = ''; } if (wDiff.styleMarkColor === undefined) { wDiff.styleMarkColor = []; } if (wDiff.styleNewline === undefined) { wDiff.styleNewline = ''; } if (wDiff.styleTab === undefined) { wDiff.styleTab = ''; } if (wDiff.styleFragment === undefined) { wDiff.styleFragment = ''; } if (wDiff.styleNoChange === undefined) { wDiff.styleNoChange = ''; } if (wDiff.styleSeparator === undefined) { wDiff.styleSeparator = ''; } if (wDiff.styleOmittedChars === undefined) { wDiff.styleOmittedChars = ''; } // // html for core diff // // dynamic replacements: {block}: block number style, {mark}: mark number style, {class}: class number, {number}: block number, {title}: title attribute (popup) // class plus html comment are required indicators for wDiff.ShortenOutput() if (wDiff.blockEvent === undefined) { wDiff.blockEvent = ' onmouseover="wDiff.BlockHandler(undefined, this, \'mouseover\');"'; } if (wDiff.htmlContainerStart === undefined) { wDiff.htmlContainerStart = '<div class="wDiffContainer" style="' + wDiff.styleContainer + '">'; } if (wDiff.htmlContainerEnd === undefined) { wDiff.htmlContainerEnd = '</div>'; } if (wDiff.htmlDeleteStart === undefined) { wDiff.htmlDeleteStart = '<span class="wDiffDelete" style="' + wDiff.styleDelete + '" title="−">'; } if (wDiff.htmlDeleteEnd === undefined) { wDiff.htmlDeleteEnd = '</span><!--wDiffDelete-->'; } if (wDiff.htmlInsertStart === undefined) { wDiff.htmlInsertStart = '<span class="wDiffInsert" style="' + wDiff.styleInsert + '" title="+">'; } if (wDiff.htmlInsertEnd === undefined) { wDiff.htmlInsertEnd = '</span><!--wDiffInsert-->'; } if (wDiff.htmlBlockLeftStart === undefined) { wDiff.htmlBlockLeftStart = '<span class="wDiffBlockLeft wDiffBlock{class}" style="' + wDiff.styleBlockLeft + '{block}" title="▶ ▢" id="wDiffBlock{number}"' + wDiff.blockEvent + '>'; } if (wDiff.htmlBlockLeftEnd === undefined) { wDiff.htmlBlockLeftEnd = '</span><!--wDiffBlockLeft-->'; } if (wDiff.htmlBlockRightStart === undefined) { wDiff.htmlBlockRightStart = '<span class="wDiffBlockRight wDiffBlock{class}" style="' + wDiff.styleBlockRight + '{block}" title="▭ ◀" id="wDiffBlock{number}"' + wDiff.blockEvent + '>'; } if (wDiff.htmlBlockRightEnd === undefined) { wDiff.htmlBlockRightEnd = '</span><!--wDiffBlockRight-->'; } if (wDiff.htmlMarkRight === undefined) { wDiff.htmlMarkRight = '<span class="wDiffMarkRight wDiffMark{class}" style="' + wDiff.styleMarkRight + '{mark}"{title} id="wDiffMark{number}"' + wDiff.blockEvent + '></span><!--wDiffMarkRight-->'; } if (wDiff.htmlMarkLeft === undefined) { wDiff.htmlMarkLeft = '<span class="wDiffMarkLeft wDiffMark{class}" style="' + wDiff.styleMarkLeft + '{mark}"{title} id="wDiffMark{number}"' + wDiff.blockEvent + '></span><!--wDiffMarkLeft-->'; } if (wDiff.htmlNewline === undefined) { wDiff.htmlNewline = '<span class="wDiffNewline" style="' + wDiff.styleNewline + '"></span>\n'; } if (wDiff.htmlTab === undefined) { wDiff.htmlTab = '<span class="wDiffTab" style="' + wDiff.styleTab + '">\t</span>'; } // // html for shorten output // if (wDiff.htmlFragmentStart === undefined) { wDiff.htmlFragmentStart = '<pre class="wDiffFragment" style="' + wDiff.styleFragment + '">'; } if (wDiff.htmlFragmentEnd === undefined) { wDiff.htmlFragmentEnd = '</pre>'; } if (wDiff.htmlNoChange === undefined) { wDiff.htmlNoChange = '<pre class="wDiffFragment" style="' + wDiff.styleNoChange + '" title="="></pre>'; } if (wDiff.htmlSeparator === undefined) { wDiff.htmlSeparator = '<div class="wDiffSeparator" style="' + wDiff.styleSeparator + '"></div>'; } if (wDiff.htmlOmittedChars === undefined) { wDiff.htmlOmittedChars = '<span class="wDiffOmittedChars" style="' + wDiff.styleOmittedChars + '">…</span>'; } // // javascript handler for output code, compatible with IE 8 // // wDiff.BlockHandler: event handler for block and mark elements if (wDiff.BlockHandler === undefined) { wDiff.BlockHandler = function (event, element, type) { // IE compatibility if ( (event === undefined) && (window.event !== undefined) ) { event = window.event; } // get mark/block elements var number = element.id.replace(/\D/g, ''); var block = document.getElementById('wDiffBlock' + number); var mark = document.getElementById('wDiffMark' + number); // highlight corresponding mark/block pairs if (type == 'mouseover') { element.onmouseover = null; element.onmouseout = function (event) { wDiff.BlockHandler(event, element, 'mouseout'); }; element.onclick = function (event) { wDiff.BlockHandler(event, element, 'click'); }; block.className += ' wDiffBlockHighlight'; mark.className += ' wDiffMarkHighlight'; } // remove mark/block highlighting if ( (type == 'mouseout') || (type == 'click') ) { element.onmouseout = null; element.onmouseover = function (event) { wDiff.BlockHandler(event, element, 'mouseover'); }; // getElementsByClassName var container = element.parentNode; var spans = container.getElementsByTagName('span'); for (var i = 0; i < spans.length; i ++) { if ( ( (spans[i] != block) && (spans[i] != mark) ) || (type != 'click') ) { if (spans[i].className.indexOf(' wDiffBlockHighlight') != -1) { spans[i].className = spans[i].className.replace(/ wDiffBlockHighlight/g, ''); } else if (spans[i].className.indexOf(' wDiffMarkHighlight') != -1) { spans[i].className = spans[i].className.replace(/ wDiffMarkHighlight/g, ''); } } } } // scroll to corresponding mark/block element if (type == 'click') { // get corresponding element var corrElement; if (element == block) { corrElement = mark; } else { corrElement = block; } // getOffsetTop var corrElementPos = 0; var node = corrElement; do { corrElementPos += node.offsetTop; } while ( (node = node.offsetParent) !== null ); // scroll element under mouse cursor var top; if (window.pageXOffset !== undefined) { top = window.pageXOffset; } else { document.documentElement.scrollTop; } var cursor; if (event.pageY !== undefined) { cursor = event.pageY; } else if (event.clientY !== undefined) { cursor = event.clientY + top; } var line = 12; if (window.getComputedStyle !== undefined) { line = parseInt(window.getComputedStyle(corrElement).getPropertyValue('line-height')); } window.scroll(0, corrElementPos + top - cursor + line / 2); } return; }; } // // start of diff code // // wDiff.Init: initialize wDiff // called from: on code load // calls: wDiff.AddStyleSheet() wDiff.Init = function () { // compatibility fixes for old names of functions window.StringDiff = wDiff.Diff; window.WDiffString = wDiff.Diff; window.WDiffShortenOutput = wDiff.ShortenOutput; // shortcut to wikEd.Debug() if (WED === undefined) { if (typeof console == 'object') { WED = console.log; } else { WED = window.alert; } } // add styles to head wDiff.AddStyleSheet(wDiff.stylesheet); // add block handler to head if running under Greasemonkey if (typeof GM_info == 'object') { var script = 'var wDiff; if (wDiff === undefined) { wDiff = {}; } wDiff.BlockHandler = ' + wDiff.BlockHandler.toString(); wDiff.AddScript(script); } return; }; // wDiff.Diff: main method // input: oldString, newString, strings containing the texts to be diffed // called from: user code // calls: wDiff.Split(), wDiff.SplitRefine(), wDiff.CalculateDiff(), wDiff.DetectBlocks(), wDiff.AssembleDiff() // returns: diff html code, call wDiff.ShortenOutput() for shortening this output wDiff.Diff = function (oldString, newString) { var diff = ''; // IE / Mac fix oldString = oldString.replace(/\r\n?/g, '\n'); newString = newString.replace(/\r\n?/g, '\n'); // prepare text data object var text = { newText: { string: newString, tokens: [], first: null, last: null }, oldText: { string: oldString, tokens: [], first: null, last: null }, diff: '' }; // trap trivial changes: no change if (oldString == newString) { text.diff = wDiff.HtmlEscape(newString); wDiff.HtmlFormat(text); return text.diff; } // trap trivial changes: old text deleted if ( (oldString === null) || (oldString.length === 0) ) { text.diff = wDiff.htmlInsertStart + wDiff.HtmlEscape(newString) + wDiff.htmlInsertEnd; wDiff.HtmlFormat(text); return text.diff; } // trap trivial changes: new text deleted if ( (newString === null) || (newString.length === 0) ) { text.diff = wDiff.htmlDeleteStart + wDiff.HtmlEscape(oldString) + wDiff.htmlDeleteEnd; wDiff.HtmlFormat(text); return text.diff; } // new symbols object var symbols = { token: [], hash: {}, linked: false }; // split new and old text into paragraps wDiff.Split(text.newText, 'paragraph'); wDiff.Split(text.oldText, 'paragraph'); // calculate diff wDiff.CalculateDiff(text, symbols, 'paragraph'); // refine different paragraphs into sentences wDiff.SplitRefine(text.newText, 'sentence'); wDiff.SplitRefine(text.oldText, 'sentence'); // calculate refined diff wDiff.CalculateDiff(text, symbols, 'sentence'); // refine different paragraphs into chunks wDiff.SplitRefine(text.newText, 'chunk'); wDiff.SplitRefine(text.oldText, 'chunk'); // calculate refined diff wDiff.CalculateDiff(text, symbols, 'chunk'); // refine different sentences into words wDiff.SplitRefine(text.newText, 'word'); wDiff.SplitRefine(text.oldText, 'word'); // calculate refined diff information with recursion for unresolved gaps wDiff.CalculateDiff(text, symbols, 'word', true); // bubble up gaps wDiff.BubbleUpGaps(text.newText, text.oldText); wDiff.BubbleUpGaps(text.oldText, text.newText); // split tokens into chars in selected unresolved gaps if (wDiff.charDiff === true) { wDiff.SplitRefineChars(text); // calculate refined diff information with recursion for unresolved gaps wDiff.CalculateDiff(text, symbols, 'character', true); } // bubble up gaps wDiff.BubbleUpGaps(text.newText, text.oldText); wDiff.BubbleUpGaps(text.oldText, text.newText); // enumerate tokens lists wDiff.EnumerateTokens(text.newText); wDiff.EnumerateTokens(text.oldText); // detect moved blocks var blocks = []; var groups = []; wDiff.DetectBlocks(text, blocks, groups); // assemble diff blocks into formatted html text diff = wDiff.AssembleDiff(text, blocks, groups); return diff; }; // wDiff.Split: split text into paragraph, sentence, or word tokens // input: text (text.newText or text.oldText), object containing text data and strings; regExp, regular expression for splitting text into tokens; token, tokens index of token to be split // changes: text (text.newText or text.oldText): text.tokens list, text.first, text.last // called from: wDiff.Diff() wDiff.Split = function (text, level, token) { var prev = null; var next = null; var current = text.tokens.length; var first = current; var string = ''; // split full text or specified token if (token === undefined) { string = text.string; } else { prev = text.tokens[token].prev; next = text.tokens[token].next; string = text.tokens[token].token; } // split text into tokens, regExp match as separator var number = 0; var split = []; var regExpMatch; var lastIndex = 0; while ( (regExpMatch = wDiff.regExpSplit[level].exec(string)) !== null) { if (regExpMatch.index > lastIndex) { split.push(string.substring(lastIndex, regExpMatch.index)); } split.push(regExpMatch[0]); lastIndex = wDiff.regExpSplit[level].lastIndex; } if (lastIndex < string.length) { split.push(string.substring(lastIndex)); } // cycle trough new tokens for (var i = 0; i < split.length; i ++) { // insert current item, link to previous text.tokens[current] = { token: split[i], prev: prev, next: null, link: null, number: null, parsed: false, unique: false }; number ++; // link previous item to current if (prev !== null) { text.tokens[prev].next = current; } prev = current; current ++; } // connect last new item and existing next item if ( (number > 0) && (token !== undefined) ) { if (prev !== null) { text.tokens[prev].next = next; } if (next !== null) { text.tokens[next].prev = prev; } } // set text first and last token index if (number > 0) { // initial text split if (token === undefined) { text.first = 0; text.last = prev; } // first or last token has been split else { if (token == text.first) { text.first = first; } if (token == text.last) { text.last = prev; } } } return; }; // wDiff.SplitRefine: split unique unmatched tokens into smaller tokens // changes: text (text.newText or text.oldText) .tokens list // called from: wDiff.Diff() // calls: wDiff.Split() wDiff.SplitRefine = function (text, regExp) { // cycle through tokens list var i = text.first; while ( (i !== null) && (text.tokens[i] !== null) ) { // refine unique unmatched tokens into smaller tokens if (text.tokens[i].link === null) { wDiff.Split(text, regExp, i); } i = text.tokens[i].next; } return; }; // wDiff.SplitRefineChars: split tokens into chars in the following unresolved regions (gaps): // - one token became separated by space, dash, or any string // - same number of tokens in gap and strong similarity of all tokens: // - addition or deletion of flanking strings in tokens // - addition or deletion of internal string in tokens // - same length and at least 50 % identity // - same start or end, same text longer than different text // - same length and at least 50 % identity // identical tokens including space separators will be linked, resulting in word-wise char-level diffs // changes: text (text.newText or text.oldText) .tokens list // called from: wDiff.Diff() // calls: wDiff.Split() // steps: // find corresponding gaps // select gaps of identical token number and strong similarity in all tokens // refine words into chars in selected gaps wDiff.SplitRefineChars = function (text) { // // find corresponding gaps // // cycle trough new text tokens list var gaps = []; var gap = null; var i = text.newText.first; var j = text.oldText.first; while ( (i !== null) && (text.newText.tokens[i] !== null) ) { // get token links var newLink = text.newText.tokens[i].link; var oldLink = null; if (j !== null) { oldLink = text.oldText.tokens[j].link; } // start of gap in new and old if ( (gap === null) && (newLink === null) && (oldLink === null) ) { gap = gaps.length; gaps.push({ newFirst: i, newLast: i, newTokens: 1, oldFirst: j, oldLast: j, oldTokens: null, charSplit: null }); } // count chars and tokens in gap else if ( (gap !== null) && (newLink === null) ) { gaps[gap].newLast = i; gaps[gap].newTokens ++; } // gap ended else if ( (gap !== null) && (newLink !== null) ) { gap = null; } // next list elements if (newLink !== null) { j = text.oldText.tokens[newLink].next; } i = text.newText.tokens[i].next; } // cycle trough gaps and add old text gap data for (var gap = 0; gap < gaps.length; gap ++) { // cycle trough old text tokens list var j = gaps[gap].oldFirst; while ( (j !== null) && (text.oldText.tokens[j] !== null) && (text.oldText.tokens[j].link === null) ) { // count old chars and tokens in gap gaps[gap].oldLast = j; gaps[gap].oldTokens ++; j = text.oldText.tokens[j].next; } } // // select gaps of identical token number and strong similarity of all tokens // for (var gap = 0; gap < gaps.length; gap ++) { var charSplit = true; // not same gap length if (gaps[gap].newTokens != gaps[gap].oldTokens) { // one word became separated by space, dash, or any string if ( (gaps[gap].newTokens == 1) && (gaps[gap].oldTokens == 3) ) { if (text.newText.tokens[ gaps[gap].newFirst ].token != text.oldText.tokens[ gaps[gap].oldFirst ].token + text.oldText.tokens[ gaps[gap].oldLast ].token ) { continue; } } else if ( (gaps[gap].oldTokens == 1) && (gaps[gap].newTokens == 3) ) { if (text.oldText.tokens[ gaps[gap].oldFirst ].token != text.newText.tokens[ gaps[gap].newFirst ].token + text.newText.tokens[ gaps[gap].newLast ].token ) { continue; } } else { continue; } } // cycle trough new text tokens list and set charSplit var i = gaps[gap].newFirst; var j = gaps[gap].oldFirst; while (i !== null) { var newToken = text.newText.tokens[i].token; var oldToken = text.oldText.tokens[j].token; // get shorter and longer token var shorterToken; var longerToken; if (newToken.length < oldToken.length) { shorterToken = newToken; longerToken = oldToken; } else { shorterToken = oldToken; longerToken = newToken; } // not same token length if (newToken.length != oldToken.length) { // test for addition or deletion of internal string in tokens // find number of identical chars from left var left = 0; while (left < shorterToken.length) { if (newToken.charAt(left) != oldToken.charAt(left)) { break; } left ++; } // find number of identical chars from right var right = 0; while (right < shorterToken.length) { if (newToken.charAt(newToken.length - 1 - right) != oldToken.charAt(oldToken.length - 1 - right)) { break; } right ++; } // no simple insertion or deletion of internal string if (left + right != shorterToken.length) { // not addition or deletion of flanking strings in tokens (smaller token not part of larger token) if (longerToken.indexOf(shorterToken) == -1) { // same text at start or end shorter than different text if ( (left < shorterToken.length / 2) && (right < shorterToken.length / 2) ) { // do not split into chars this gap charSplit = false; break; } } } } // same token length else if (newToken != oldToken) { // tokens less than 50 % identical var ident = 0; for (var pos = 0; pos < shorterToken.length; pos ++) { if (shorterToken.charAt(pos) == longerToken.charAt(pos)) { ident ++; } } if (ident/shorterToken.length < 0.49) { // do not split into chars this gap charSplit = false; break; } } // next list elements if (i == gaps[gap].newLast) { break; } i = text.newText.tokens[i].next; j = text.oldText.tokens[j].next; } gaps[gap].charSplit = charSplit; } // // refine words into chars in selected gaps // for (var gap = 0; gap < gaps.length; gap ++) { if (gaps[gap].charSplit === true) { // cycle trough new text tokens list var i = gaps[gap].newFirst; var j = gaps[gap].oldFirst; while (i !== null) { var newToken = text.newText.tokens[i].token; var oldToken = text.oldText.tokens[j].token; // link identical tokens (spaces) if (newToken == oldToken) { text.newText.tokens[i].link = j; text.oldText.tokens[j].link = i; } // refine different words into chars else { wDiff.Split(text.newText, 'character', i); wDiff.Split(text.oldText, 'character', j); } // next list elements if (i == gaps[gap].newLast) { break; } i = text.newText.tokens[i].next; j = text.oldText.tokens[j].next; } } } // WED('Gap', wDiff.DebugGaps(gaps)); return; }; // wDiff.BubbleUpGaps: move gaps with ambiguous identical fronts and backs up // start ambiguous gap borders after line breaks and text section closing characters // changes: text (text.newText or text.oldText) .tokens list // called from: wDiff.Diff() wDiff.BubbleUpGaps = function (text, textLinked) { // cycle through tokens list var i = text.first; var gapStart = null; while ( (i !== null) && (text.tokens[i] !== null) ) { // remember gap start if ( (gapStart === null) && (text.tokens[i].link === null) ) { gapStart = i; } // find gap end else if ( (gapStart !== null) && (text.tokens[i].link !== null) ) { // bubble down as deep as possible var front = gapStart; var back = i; while ( (front !== null) && (back !== null) && (text.tokens[front].link === null) && (text.tokens[back].link !== null) && (text.tokens[front].token === text.tokens[back].token) ) { text.tokens[front].link = text.tokens[back].link; textLinked.tokens[ text.tokens[front].link ].link = front; text.tokens[back].link = null; front = text.tokens[front].next; back = text.tokens[back].next; } // test baloon up, remember last line break or closing text var frontStop = null; front = text.tokens[front].prev; back = text.tokens[back].prev; var frontTest = front; var backTest = back; while ( (frontTest !== null) && (backTest !== null) && (text.tokens[frontTest].link !== null) && (text.tokens[backTest].link === null) && (text.tokens[frontTest].token == text.tokens[backTest].token) ) { frontTest = text.tokens[frontTest].prev; backTest = text.tokens[backTest].prev; if (wDiff.regExpBubbleStop.test(text.tokens[frontTest].token) === true) { frontStop = frontTest; break; } else if ( (frontStop === null) && (wDiff.regExpBubbleClosing.test(text.tokens[frontTest].token) === true) ) { frontStop = frontTest; } } // actually bubble up to last line break or closing text if (frontStop !== null) { while ( (front !== null) && (back !== null) && (front !== frontStop) && (text.tokens[front].link !== null) && (text.tokens[back].link === null) && (text.tokens[front].token == text.tokens[back].token) ) { text.tokens[back].link = text.tokens[front].link; textLinked.tokens[ text.tokens[back].link ].link = back; text.tokens[front].link = null; front = text.tokens[front].prev; back = text.tokens[back].prev; } } gapStart = null; } i = text.tokens[i].next; } return; }; // wDiff.EnumerateTokens: enumerate text token list // changes: text (text.newText or text.oldText) .tokens list // called from: wDiff.Diff() wDiff.EnumerateTokens = function (text) { // enumerate tokens list var number = 0; var i = text.first; while ( (i !== null) && (text.tokens[i] !== null) ) { text.tokens[i].number = number; number ++; i = text.tokens[i].next; } return; }; // wDiff.CalculateDiff: calculate diff information, can be called repeatedly during refining // input: text: object containing text data and tokens; level: 'paragraph', 'sentence', 'word', or 'character' // optionally for recursive calls: newStart, newEnd, oldStart, oldEnd (tokens list indexes), recursionLevel // changes: text.oldText/newText.tokens[].link, links corresponding tokens from old and new text // steps: // pass 1: parse new text into symbol table // pass 2: parse old text into symbol table // pass 3: connect unique matched tokens // pass 4: connect adjacent identical tokens downwards // pass 5: connect adjacent identical tokens upwards // recursively diff still unresolved regions downwards // recursively diff still unresolved regions upwards wDiff.CalculateDiff = function (text, symbols, level, recurse, newStart, newEnd, oldStart, oldEnd, recursionLevel) { // set defaults if (newStart === undefined) { newStart = text.newText.first; } if (newEnd === undefined) { newEnd = text.newText.last; } if (oldStart === undefined) { oldStart = text.oldText.first; } if (oldEnd === undefined) { oldEnd = text.oldText.last; } if (recursionLevel === undefined) { recursionLevel = 0; } // limit recursion depth if (recursionLevel > 10) { return; } // // pass 1: parse new text into symbol table // // cycle trough new text tokens list var i = newStart; while ( (i !== null) && (text.newText.tokens[i] !== null) ) { // add new entry to symbol table var token = text.newText.tokens[i].token; if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) { var current = symbols.token.length; symbols.hash[token] = current; symbols.token[current] = { newCount: 1, oldCount: 0, newToken: i, oldToken: null }; } // or update existing entry else { // increment token counter for new text var hashToArray = symbols.hash[token]; symbols.token[hashToArray].newCount ++; } // next list element if (i == newEnd) { break; } i = text.newText.tokens[i].next; } // // pass 2: parse old text into symbol table // // cycle trough old text tokens list var j = oldStart; while ( (j !== null) && (text.oldText.tokens[j] !== null) ) { // add new entry to symbol table var token = text.oldText.tokens[j].token; if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) { var current = symbols.token.length; symbols.hash[token] = current; symbols.token[current] = { newCount: 0, oldCount: 1, newToken: null, oldToken: j }; } // or update existing entry else { // increment token counter for old text var hashToArray = symbols.hash[token]; symbols.token[hashToArray].oldCount ++; // add token number for old text symbols.token[hashToArray].oldToken = j; } // next list element if (j === oldEnd) { break; } j = text.oldText.tokens[j].next; } // // pass 3: connect unique tokens // // cycle trough symbol array for (var i = 0; i < symbols.token.length; i ++) { // find tokens in the symbol table that occur only once in both versions if ( (symbols.token[i].newCount == 1) && (symbols.token[i].oldCount == 1) ) { var newToken = symbols.token[i].newToken; var oldToken = symbols.token[i].oldToken; // do not use spaces as unique markers if (/^\s+$/.test(text.newText.tokens[newToken].token) === false) { // connect from new to old and from old to new if (text.newText.tokens[newToken].link === null) { text.newText.tokens[newToken].link = oldToken; text.oldText.tokens[oldToken].link = newToken; symbols.linked = true; if ( (level != 'character') && (recursionLevel === 0) ) { text.newText.tokens[newToken].unique = true; text.oldText.tokens[oldToken].unique = true; } } } } } // // pass 4: connect adjacent identical tokens downwards // // cycle trough new text tokens list if (symbols.linked === true) { var i = text.newText.first; while ( (i !== null) && (text.newText.tokens[i] !== null) ) { var iNext = text.newText.tokens[i].next; // find already connected pairs var j = text.newText.tokens[i].link; if (j !== null) { var jNext = text.oldText.tokens[j].next; // check if the following tokens are not yet connected if ( (iNext !== null) && (jNext !== null) ) { if ( (text.newText.tokens[iNext].link === null) && (text.oldText.tokens[jNext].link === null) ) { // connect if the following tokens are the same if (text.newText.tokens[iNext].token == text.oldText.tokens[jNext].token) { text.newText.tokens[iNext].link = jNext; text.oldText.tokens[jNext].link = iNext; } } } } i = iNext; } // // pass 5: connect adjacent identical tokens upwards // // cycle trough new text tokens list var i = text.newText.last; while ( (i !== null) && (text.newText.tokens[i] !== null) ) { var iNext = text.newText.tokens[i].prev; // find already connected pairs var j = text.newText.tokens[i].link; if (j !== null) { var jNext = text.oldText.tokens[j].prev; // check if the preceeding tokens are not yet connected if ( (iNext !== null) && (jNext !== null) ) { if ( (text.newText.tokens[iNext].link === null) && (text.oldText.tokens[jNext].link === null) ) { // connect if the preceeding tokens are the same if (text.newText.tokens[iNext].token == text.oldText.tokens[jNext].token) { text.newText.tokens[iNext].link = jNext; text.oldText.tokens[jNext].link = iNext; } } } } i = iNext; } // refine by recursively diffing unresolved regions caused by addition of common tokens around sequences of common tokens, only at word level split if ( (recurse === true) && (wDiff.recursiveDiff === true) ) { // // recursively diff still unresolved regions downwards // // cycle trough new text tokens list var i = newStart; var j = oldStart; while ( (i !== null) && (text.newText.tokens[i] !== null) ) { // get j from previous tokens match var iPrev = text.newText.tokens[i].prev; if (iPrev !== null) { var jPrev = text.newText.tokens[iPrev].link; if (jPrev !== null) { j = text.oldText.tokens[jPrev].next; } } // check for the start of an unresolved sequence if ( (j !== null) && (text.oldText.tokens[j] !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) ) { // determine the limits of of the unresolved new sequence var iStart = i; var iEnd = null; var iLength = 0; var iNext = i; while ( (iNext !== null) && (text.newText.tokens[iNext].link === null) ) { iEnd = iNext; iLength ++; if (iEnd == newEnd) { break; } iNext = text.newText.tokens[iNext].next; } // determine the limits of of the unresolved old sequence var jStart = j; var jEnd = null; var jLength = 0; var jNext = j; while ( (jNext !== null) && (text.oldText.tokens[jNext].link === null) ) { jEnd = jNext; jLength ++; if (jEnd == oldEnd) { break; } jNext = text.oldText.tokens[jNext].next; } // recursively diff the unresolved sequence if ( (iLength > 1) || (jLength > 1) ) { // new symbols object for sub-region var symbolsRecurse = { token: [], hash: {}, linked: false }; wDiff.CalculateDiff(text, symbolsRecurse, level, true, iStart, iEnd, jStart, jEnd, recursionLevel + 1); } i = iEnd; } // next list element if (i == newEnd) { break; } i = text.newText.tokens[i].next; } // // recursively diff still unresolved regions upwards // // cycle trough new text tokens list var i = newEnd; var j = oldEnd; while ( (i !== null) && (text.newText.tokens[i] !== null) ) { // get j from next matched tokens var iPrev = text.newText.tokens[i].next; if (iPrev !== null) { var jPrev = text.newText.tokens[iPrev].link; if (jPrev !== null) { j = text.oldText.tokens[jPrev].prev; } } // check for the start of an unresolved sequence if ( (j !== null) && (text.oldText.tokens[j] !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) ) { // determine the limits of of the unresolved new sequence var iStart = null; var iEnd = i; var iLength = 0; var iNext = i; while ( (iNext !== null) && (text.newText.tokens[iNext].link === null) ) { iStart = iNext; iLength ++; if (iStart == newStart) { break; } iNext = text.newText.tokens[iNext].prev; } // determine the limits of of the unresolved old sequence var jStart = null; var jEnd = j; var jLength = 0; var jNext = j; while ( (jNext !== null) && (text.oldText.tokens[jNext].link === null) ) { jStart = jNext; jLength ++; if (jStart == oldStart) { break; } jNext = text.oldText.tokens[jNext].prev; } // recursively diff the unresolved sequence if ( (iLength > 1) || (jLength > 1) ) { // new symbols object for sub-region var symbolsRecurse = { token: [], hash: {}, linked: false }; wDiff.CalculateDiff(text, symbolsRecurse, level, true, iStart, iEnd, jStart, jEnd, recursionLevel + 1); } i = iStart; } // next list element if (i == newStart) { break; } i = text.newText.tokens[i].prev; } } } return; }; // wDiff.DetectBlocks: extract block data for inserted, deleted, or moved blocks from diff data in text object // input: // text: object containing text tokens list // blocks: empty array for block data // groups: empty array for group data // changes: text, blocks, groups // called from: wDiff.Diff() // scheme of blocks, sections, and groups (old block numbers): // old: 1 2 3D4 5E6 7 8 9 10 11 // | ‾/-/_ X | >|< | // new: 1 I 3D4 2 E6 5 N 7 10 9 8 11 // section: 0 0 0 1 1 2 2 2 // group: 0 10 111 2 33 4 11 5 6 7 8 9 // fixed: + +++ - ++ - + + - - + // type: = + =-= = -= = + = = = = = wDiff.DetectBlocks = function (text, blocks, groups) { // WED('text.oldText', wDiff.DebugText(text.oldText)); // WED('text.newText', wDiff.DebugText(text.newText)); // collect identical corresponding ('same') blocks from old text and sort by new text wDiff.GetSameBlocks(text, blocks); // collect independent block sections (no old/new crosses outside section) for per-section determination of non-moving (fixed) groups var sections = []; wDiff.GetSections(blocks, sections); // find groups of continuous old text blocks wDiff.GetGroups(blocks, groups); // set longest sequence of increasing groups in sections as fixed (not moved) wDiff.SetFixed(blocks, groups, sections); // collect deletion ('del') blocks from old text wDiff.GetDelBlocks(text, blocks); // position 'del' blocks into new text order wDiff.PositionDelBlocks(blocks); // sort blocks by new text token number and update groups wDiff.SortBlocks(blocks, groups); // convert groups to insertions/deletions if maximal block length is too short if (wDiff.blockMinLength > 0) { var unlinked = wDiff.UnlinkBlocks(text, blocks, groups); // repeat from start after conversion if (unlinked === true) { wDiff.GetSameBlocks(text, blocks); wDiff.GetSections(blocks, sections); wDiff.GetGroups(blocks, groups); wDiff.SetFixed(blocks, groups, sections); wDiff.GetDelBlocks(text, blocks); wDiff.PositionDelBlocks(blocks); } } // collect insertion ('ins') blocks from new text wDiff.GetInsBlocks(text, blocks); // sort blocks by new text token number and update groups wDiff.SortBlocks(blocks, groups); // set group numbers of 'ins' and 'del' blocks wDiff.SetInsDelGroups(blocks, groups); // mark original positions of moved groups wDiff.MarkMoved(groups); // set moved block colors wDiff.ColorMoved(groups); // WED('Groups', wDiff.DebugGroups(groups)); // WED('Blocks', wDiff.DebugBlocks(blocks)); return; }; // wDiff.GetSameBlocks: collect identical corresponding ('same') blocks from old text and sort by new text // called from: DetectBlocks() // changes: creates blocks wDiff.GetSameBlocks = function (text, blocks) { // clear blocks array blocks.splice(0); // cycle through old text to find matched (linked) blocks var j = text.oldText.first; var i = null; while (j !== null) { // skip 'del' blocks while ( (j !== null) && (text.oldText.tokens[j].link === null) ) { j = text.oldText.tokens[j].next; } // get 'same' block if (j !== null) { i = text.oldText.tokens[j].link; var iStart = i; var jStart = j; // detect matching blocks ('same') var count = 0; var unique = false; var chars = 0; var string = ''; while ( (i !== null) && (j !== null) && (text.oldText.tokens[j].link == i) ) { var token = text.oldText.tokens[j].token; count ++; unique = unique || text.newText.tokens[i].unique; chars += token.length; string += token; i = text.newText.tokens[i].next; j = text.oldText.tokens[j].next; } // save old text 'same' block blocks.push({ oldBlock: blocks.length, newBlock: null, oldNumber: text.oldText.tokens[jStart].number, newNumber: text.newText.tokens[iStart].number, oldStart: jStart, count: count, unique: unique, words: wDiff.WordCount(string), chars: chars, type: 'same', section: null, group: null, fixed: null, string: string }); } } // sort blocks by new text token number blocks.sort(function(a, b) { return a.newNumber - b.newNumber; }); // number blocks in new text order for (var block = 0; block < blocks.length; block ++) { blocks[block].newBlock = block; } return; }; // wDiff.GetSections: collect independent block sections (no old/new crosses outside section) for per-section determination of non-moving (fixed) groups // called from: DetectBlocks() // changes: creates sections, blocks[].section wDiff.GetSections = function (blocks, sections) { // clear sections array sections.splice(0); // cycle through blocks for (var block = 0; block < blocks.length; block ++) { var sectionStart = block; var sectionEnd = block; var oldMax = blocks[sectionStart].oldNumber; var sectionOldMax = oldMax; // check right for (var j = sectionStart + 1; j < blocks.length; j ++) { // check for crossing over to the left if (blocks[j].oldNumber > oldMax) { oldMax = blocks[j].oldNumber; } else if (blocks[j].oldNumber < sectionOldMax) { sectionEnd = j; sectionOldMax = oldMax; } } // save crossing sections if (sectionEnd > sectionStart) { // save section to block for (var i = sectionStart; i <= sectionEnd; i ++) { blocks[i].section = sections.length; } // save section sections.push({ blockStart: sectionStart, blockEnd: sectionEnd, deleted: false }); block = sectionEnd; } } return; }; // wDiff.GetGroups: find groups of continuous old text blocks // called from: DetectBlocks() // changes: creates groups, blocks[].group wDiff.GetGroups = function (blocks, groups) { // clear groups array groups.splice(0); // cycle through blocks for (var block = 0; block < blocks.length; block ++) { if (blocks[block].deleted === true) { continue; } var groupStart = block; var groupEnd = block; var oldBlock = blocks[groupStart].oldBlock; // get word and char count of block var words = wDiff.WordCount(blocks[block].string); var maxWords = words; var unique = false; var chars = blocks[block].chars; // check right for (var i = groupEnd + 1; i < blocks.length; i ++) { // check for crossing over to the left if (blocks[i].oldBlock != oldBlock + 1) { break; } oldBlock = blocks[i].oldBlock; // get word and char count of block if (blocks[i].words > maxWords) { maxWords = blocks[i].words; } unique = unique || blocks[i].unique; words += blocks[i].words; chars += blocks[i].chars; groupEnd = i; } // save crossing group if (groupEnd >= groupStart) { // set groups outside sections as fixed var fixed = false; if (blocks[groupStart].section === null) { fixed = true; } // save group to block for (var i = groupStart; i <= groupEnd; i ++) { blocks[i].group = groups.length; blocks[i].fixed = fixed; } // save group groups.push({ oldNumber: blocks[groupStart].oldNumber, blockStart: groupStart, blockEnd: groupEnd, unique: unique, maxWords: maxWords, words: words, chars: chars, fixed: fixed, moved: [], movedFrom: null, color: null, diff: '' }); block = groupEnd; } } return; }; // wDiff.SetFixed: set longest sequence of increasing groups in sections as fixed (not moved) // called from: DetectBlocks() // calls: wDiff.FindMaxPath() // changes: groups[].fixed, blocks[].fixed wDiff.SetFixed = function (blocks, groups, sections) { // cycle through sections for (var section = 0; section < sections.length; section ++) { var blockStart = sections[section].blockStart; var blockEnd = sections[section].blockEnd; var groupStart = blocks[blockStart].group; var groupEnd = blocks[blockEnd].group; // recusively find path of groups in increasing old group order with longest char length // start at each group of section var cache = []; var maxChars = 0; var maxPath = null; for (var i = groupStart; i <= groupEnd; i ++) { var pathObj = wDiff.FindMaxPath(i, [], 0, cache, groups, groupEnd); if (pathObj.chars > maxChars) { maxPath = pathObj.path; maxChars = pathObj.chars; } } // mark fixed groups for (var i = 0; i < maxPath.length; i ++) { var group = maxPath[i]; groups[group].fixed = true; // mark fixed blocks for (var block = groups[group].blockStart; block <= groups[group].blockEnd; block ++) { blocks[block].fixed = true; } } } return; }; // wDiff.FindMaxPath: recusively find path of groups in increasing old group order with longest char length // input: start, path start group; path, array of path groups; chars, char count of path; cache, cached sub-path lengths; groups, groups, group object; groupEnd, last group // returns: returnObj, contains path and length // called from: wDiff.SetFixed() // calls: itself recursively wDiff.FindMaxPath = function (start, path, chars, cache, groups, groupEnd) { // add current path point var pathLocal = path.slice(); pathLocal.push(start); chars = chars + groups[start].chars; // last group, terminate recursion var returnObj = { path: pathLocal, chars: chars }; if (start == groupEnd) { return returnObj; } // find longest sub-path var maxChars = 0; var oldNumber = groups[start].oldNumber; for (var i = start + 1; i <= groupEnd; i ++) { // only in increasing old group order if (groups[i].oldNumber < oldNumber) { continue; } // get longest sub-path from cache if (cache[start] !== undefined) { returnObj = cache[start]; } // get longest sub-path by recursion else { var pathObj = wDiff.FindMaxPath(i, pathLocal, chars, cache, groups, groupEnd); // select longest sub-path if (pathObj.chars > maxChars) { returnObj = pathObj; } } } // save longest path to cache if (cache[i] === undefined) { cache[start] = returnObj; } return returnObj; }; // wDiff.GetDelBlocks: collect deletion ('del') blocks from old text // called from: DetectBlocks() // changes: blocks wDiff.GetDelBlocks = function (text, blocks) { // cycle through old text to find matched (linked) blocks var j = text.oldText.first; var i = null; while (j !== null) { // collect 'del' blocks var oldStart = j; var count = 0; var string = ''; while ( (j !== null) && (text.oldText.tokens[j].link === null) ) { count ++; string += text.oldText.tokens[j].token; j = text.oldText.tokens[j].next; } // save old text 'del' block if (count !== 0) { blocks.push({ oldBlock: null, newBlock: null, oldNumber: text.oldText.tokens[oldStart].number, newNumber: null, oldStart: oldStart, count: count, unique: false, words: null, chars: null, type: 'del', section: null, group: null, fixed: null, string: string }); } // skip 'same' block if (j !== null) { i = text.oldText.tokens[j].link; while ( (i !== null) && (j !== null) && (text.oldText.tokens[j].link == i) ) { i = text.newText.tokens[i].next; j = text.oldText.tokens[j].next; } } } return; }; // wDiff.PositionDelBlocks: position 'del' blocks into new text order // called from: DetectBlocks() // changes: blocks[].section/group/fixed/newNumber // // deletion blocks move with fixed neighbor (new number +/- 0.1): // old: 1 D 2 1 D 2 // / / \ / \ \ // new: 1 D 2 1 D 2 // fixed: * * // new number: 1 1.1 1.9 2 wDiff.PositionDelBlocks = function (blocks) { // sort shallow copy of blocks by oldNumber var blocksOld = blocks.slice(); blocksOld.sort(function(a, b) { return a.oldNumber - b.oldNumber; }); // cycle through 'del' blocks in old text order for (var blockOld = 0; blockOld < blocksOld.length; blockOld ++) { var delBlock = blocksOld[blockOld]; if (delBlock.type != 'del') { continue; } // get old text prev block var prevBlock; if (blockOld > 0) { prevBlock = blocks[ blocksOld[blockOld - 1].newBlock ]; } // get old text next block var nextBlock; if (blockOld < blocksOld.length - 1) { nextBlock = blocks[ blocksOld[blockOld + 1].newBlock ]; } // move after prev block if fixed var neighbor; if ( (prevBlock !== undefined) && (prevBlock.fixed === true) ) { neighbor = prevBlock; delBlock.newNumber = neighbor.newNumber + 0.1; } // move before next block if fixed else if ( (nextBlock !== undefined) && (nextBlock.fixed === true) ) { neighbor = nextBlock; delBlock.newNumber = neighbor.newNumber - 0.1; } // move after prev block if existent else if (prevBlock !== undefined) { neighbor = prevBlock; delBlock.newNumber = neighbor.newNumber + 0.1; } // move before next block else if (nextBlock !== undefined) { neighbor = nextBlock; delBlock.newNumber = neighbor.newNumber - 0.1; } // move before first block else { delBlock.newNumber = -0.1; } // update 'del' block with neighbor data if (neighbor !== undefined) { delBlock.section = neighbor.section; delBlock.group = neighbor.group; delBlock.fixed = neighbor.fixed; } } return; }; // wDiff.UnlinkBlocks: convert 'same' blocks in groups into 'ins'/'del' pairs if too short // called from: DetectBlocks() // changes: text.newText/oldText[].link // returns: true if text tokens were unlinked wDiff.UnlinkBlocks = function (text, blocks, groups) { var unlinked = false; // cycle through groups for (var group = 0; group < groups.length; group ++) { var blockStart = groups[group].blockStart; var blockEnd = groups[group].blockEnd; // no block in group is at least blockMinLength words long if (groups[group].maxWords < wDiff.blockMinLength) { // unlink whole moved group if it contains no unique matched token if ( (groups[group].fixed === false) && (groups[group].unique === false) ) { for (var block = blockStart; block <= blockEnd; block ++) { if (blocks[block].type == 'same') { wDiff.UnlinkSingleBlock(blocks[block], text); unlinked = true; } } } // unlink block flanks else { // unlink from start if preceded by 'del' for (var block = blockStart; block <= blockEnd; block ++) { if ( (block > 0) && (blocks[block - 1].type == 'del') && (blocks[block].type == 'same') ) { // stop unlinking if more than one word or a unique word if ( (blocks[block].words > 1) || ( (blocks[block].words == 1) && (blocks[block].unique === true) ) ) { break; } wDiff.UnlinkSingleBlock(blocks[block], text); unlinked = true; blockStart = block; } } // unlink from end if followed by 'del' for (var block = blockEnd; block > blockStart; block --) { if ( (blockEnd < blocks.length - 1) && (blocks[block + 1].type == 'del') && (blocks[block].type == 'same') ) { // stop unlinking if more than one word or a unique word if ( (blocks[block].words > 1) || ( (blocks[block].words == 1) && (blocks[block].unique === true) ) ) { break; } wDiff.UnlinkSingleBlock(blocks[block], text); unlinked = true; } } } } } return unlinked; }; // wDiff.UnlinkBlock: un-link text tokens of single block, converting them into 'ins'/'del' pairs // called from: wDiff.UnlinkBlocks() // changes: text.newText/oldText[].link wDiff.UnlinkSingleBlock = function (block, text) { // cycle through old text var j = block.oldStart; for (var count = 0; count < block.count; count ++) { // unlink tokens text.newText.tokens[ text.oldText.tokens[j].link ].link = null; text.oldText.tokens[j].link = null; j = text.oldText.tokens[j].next; } return; }; // wDiff.GetInsBlocks: collect insertion ('ins') blocks from new text // called from: DetectBlocks() // changes: blocks wDiff.GetInsBlocks = function (text, blocks) { // cycle through new text to find insertion blocks var i = text.newText.first; while (i !== null) { // jump over linked (matched) block while ( (i !== null) && (text.newText.tokens[i].link !== null) ) { i = text.newText.tokens[i].next; } // detect insertion blocks ('ins') if (i !== null) { var iStart = i; var count = 0; var string = ''; while ( (i !== null) && (text.newText.tokens[i].link === null) ) { count ++; string += text.newText.tokens[i].token; i = text.newText.tokens[i].next; } // save new text 'ins' block blocks.push({ oldBlock: null, newBlock: null, oldNumber: null, newNumber: text.newText.tokens[iStart].number, oldStart: null, count: count, unique: false, words: null, chars: null, type: 'ins', section: null, group: null, fixed: null, string: string }); } } return; }; // wDiff.SortBlocks: sort blocks by new text token number and update groups // called from: DetectBlocks() // changes: blocks wDiff.SortBlocks = function (blocks, groups) { // sort by newNumber blocks.sort(function(a, b) { return a.newNumber - b.newNumber; }); // cycle through blocks and update groups with new block numbers var group = null; for (var block = 0; block < blocks.length; block ++) { var blockGroup = blocks[block].group; if (blockGroup !== null) { if (blockGroup != group) { group = blocks[block].group; groups[group].blockStart = block; groups[group].oldNumber = blocks[block].oldNumber; } groups[blockGroup].blockEnd = block; } } return; }; // wDiff.SetInsDelGroups: set group numbers of 'ins' and 'del' blocks // called from: DetectBlocks() // changes: groups, blocks[].fixed/group wDiff.SetInsDelGroups = function (blocks, groups) { // set group numbers of 'ins' and 'del' blocks inside existing groups for (var group = 0; group < groups.length; group ++) { var fixed = groups[group].fixed; for (var block = groups[group].blockStart; block <= groups[group].blockEnd; block ++) { if (blocks[block].group === null) { blocks[block].group = group; blocks[block].fixed = fixed; } } } // add remaining 'ins' and 'del' blocks to groups // cycle through blocks for (var block = 0; block < blocks.length; block ++) { // skip existing groups if (blocks[block].group === null) { blocks[block].group = groups.length; var fixed = blocks[block].fixed; // save group groups.push({ oldNumber: blocks[block].oldNumber, blockStart: block, blockEnd: block, unique: false, maxWords: null, words: null, chars: null, fixed: fixed, moved: [], movedFrom: null, color: null, diff: '' }); } } return; }; // wDiff.MarkMoved: mark original positions of moved groups // called from: DetectBlocks() // changes: groups[].moved/movedFrom // moved block marks at original positions relative to fixed groups: // groups: 3 7 // 1 <| | (no next smaller fixed) // 5 |< | // |> 5 | // | 5 <| // | >| 5 // | |> 9 (no next larger fixed) // fixed: * * // mark direction: groups[movedGroup].blockStart < groups[group].blockStart // group side: groups[movedGroup].oldNumber < groups[group].oldNumber wDiff.MarkMoved = function (groups) { // cycle through groups (moved group) for (var movedGroup = 0; movedGroup < groups.length; movedGroup ++) { if (groups[movedGroup].fixed !== false) { continue; } var movedOldNumber = groups[movedGroup].oldNumber; // find closest fixed groups var nextSmallerNumber = null; var nextSmallerGroup = null; var nextLargerNumber = null; var nextLargerGroup = null; // cycle through groups (original positions) for (var group = 0; group < groups.length; group ++) { if ( (groups[group].fixed !== true) || (group == movedGroup) ) { continue; } // find fixed group with closest smaller oldNumber var oldNumber = groups[group].oldNumber; if ( (oldNumber < movedOldNumber) && ( (nextSmallerNumber === null) || (oldNumber > nextSmallerNumber) ) ) { nextSmallerNumber = oldNumber; nextSmallerGroup = group; } // find fixed group with closest larger oldNumber if ( (oldNumber > movedOldNumber) && ( (nextLargerNumber === null) || (oldNumber < nextLargerNumber) ) ) { nextLargerNumber = oldNumber; nextLargerGroup = group; } } // no larger fixed group, moved right var movedFrom = ''; if (nextLargerGroup === null) { movedFrom = 'left'; } // no smaller fixed group, moved right else if (nextSmallerGroup === null) { movedFrom = 'right'; } // group moved from between two closest fixed neighbors, moved left or right depending on char distance else { var rightChars = 0; for (var group = nextSmallerGroup + 1; group < movedGroup; group ++) { rightChars += groups[group].chars; } var leftChars = 0; for (var group = movedGroup + 1; group < nextLargerGroup; group ++) { leftChars += groups[group].chars; } // moved right if (rightChars <= leftChars) { movedFrom = 'left'; } // moved left else { movedFrom = 'right'; } } // check for null-moves if (movedFrom == 'left') { if (groups[nextSmallerGroup].blockEnd + 1 != groups[movedGroup].blockStart) { groups[nextSmallerGroup].moved.push(movedGroup); groups[movedGroup].movedFrom = nextSmallerGroup; } } else if (movedFrom == 'right') { if (groups[movedGroup].blockEnd + 1 != groups[nextLargerGroup].blockStart) { groups[nextLargerGroup].moved.push(movedGroup); groups[movedGroup].movedFrom = nextLargerGroup; } } } // cycle through groups, sort blocks moved from here by old number for (var group = 0; group < groups.length; group ++) { var moved = groups[group].moved; if (moved !== null) { moved.sort(function(a, b) { return groups[a].oldNumber - groups[b].oldNumber; }); } } return; }; // wDiff.ColorMoved: set moved block color numbers // called from: DetectBlocks() // changes: groups[].color wDiff.ColorMoved = function (groups) { // cycle through groups var moved = []; for (var group = 0; group < groups.length; group ++) { moved = moved.concat(groups[group].moved); } // sort moved array by old number moved.sort(function(a, b) { return groups[a].oldNumber - groups[b].oldNumber; }); // set color var color = 0; for (var i = 0; i < moved.length; i ++) { var movedGroup = moved[i]; if (wDiff.showBlockMoves === true) { groups[movedGroup].color = color; color ++; } } return; }; // wDiff.AssembleDiff: process diff data into formatted html text // input: text, object containing text tokens list; blocks, array containing block type; groups, array containing fixed (not moved), color, and moved mark data // returns: diff html string // called from: wDiff.Diff() // calls: wDiff.HtmlCustomize(), wDiff.HtmlFormat() wDiff.AssembleDiff = function (text, blocks, groups) { // // create group diffs // // cycle through groups for (var group = 0; group < groups.length; group ++) { var color = groups[group].color; var blockStart = groups[group].blockStart; var blockEnd = groups[group].blockEnd; var diff = ''; // check for colored block and move direction var blockFrom = null; if (color !== null) { if (groups[ groups[group].movedFrom ].blockStart < blockStart) { blockFrom = 'left'; } else { blockFrom = 'right'; } } // add colored block start markup if (blockFrom == 'left') { diff += wDiff.HtmlCustomize(wDiff.htmlBlockLeftStart, color); } else if (blockFrom == 'right') { diff += wDiff.HtmlCustomize(wDiff.htmlBlockRightStart, color); } // cycle through blocks for (var block = blockStart; block <= blockEnd; block ++) { var type = blocks[block].type; var string = blocks[block].string; // html escape text string string = wDiff.HtmlEscape(string); // add 'same' (unchanged) text if (type == 'same') { if (color !== null) { string = string.replace(/\n/g, wDiff.htmlNewline); } diff += string; } // add 'del' text else if (type == 'del') { string = string.replace(/\n/g, wDiff.htmlNewline); diff += wDiff.htmlDeleteStart + string + wDiff.htmlDeleteEnd; } // add 'ins' text else if (type == 'ins') { string = string.replace(/\n/g, wDiff.htmlNewline); diff += wDiff.htmlInsertStart + string + wDiff.htmlInsertEnd; } } // add colored block end markup if (blockFrom == 'left') { diff += wDiff.htmlBlockLeftEnd; } else if (blockFrom == 'right') { diff += wDiff.htmlBlockRightEnd; } groups[group].diff = diff; } // // mark original block positions // // cycle through groups for (var group = 0; group < groups.length; group ++) { var moved = groups[group].moved; // cycle through list of groups moved from here var leftMarks = ''; var rightMarks = ''; for (var i = 0; i < moved.length; i ++) { var movedGroup = moved[i]; var markColor = groups[movedGroup].color; var mark = ''; // get moved block text var movedText = ''; for (var block = groups[movedGroup].blockStart; block <= groups[movedGroup].blockEnd; block ++) { if (blocks[block].type != 'ins') { movedText += blocks[block].string; } } // display as deletion at original position if (wDiff.showBlockMoves === false) { mark = wDiff.htmlDeleteStart + wDiff.HtmlEscape(movedText) + wDiff.htmlDeleteEnd; } // get mark direction else { if (groups[movedGroup].blockStart < groups[group].blockStart) { mark = wDiff.htmlMarkLeft; } else { mark = wDiff.htmlMarkRight; } mark = wDiff.HtmlCustomize(mark, markColor, movedText); } // get side of group to mark if (groups[movedGroup].oldNumber < groups[group].oldNumber) { leftMarks += mark; } else { rightMarks += mark; } } groups[group].diff = leftMarks + groups[group].diff + rightMarks; } // // join diffs // // make shallow copy of groups and sort by blockStart var groupsSort = groups.slice(); groupsSort.sort(function(a, b) { return a.blockStart - b.blockStart; }); // cycle through sorted groups and assemble diffs for (var group = 0; group < groupsSort.length; group ++) { text.diff += groupsSort[group].diff; } // WED('Groups', wDiff.DebugGroups(groups)); // keep newlines and multiple spaces wDiff.HtmlFormat(text); // WED('text.diff', text.diff); return text.diff; }; // // wDiff.HtmlCustomize: customize move indicator html: {block}: block number style, {mark}: mark number style, {class}: class number, {number}: block number, {title}: title attribute (popup) // input: text (html or css code) // returns: customized text // called from: wDiff.AssembleDiff() wDiff.HtmlCustomize = function (text, number, title) { if (wDiff.coloredBlocks === true) { var blockStyle = wDiff.styleBlockColor[number]; if (blockStyle === undefined) { blockStyle = ''; } var markStyle = wDiff.styleMarkColor[number]; if (markStyle === undefined) { markStyle = ''; } text = text.replace(/\{block\}/g, ' ' + blockStyle); text = text.replace(/\{mark\}/g, ' ' + markStyle); text = text.replace(/\{class\}/g, number); } else { text = text.replace(/\{block\}|\{mark\}|\{class\}/g, ''); } text = text.replace(/\{number\}/g, number); // shorten title text, replace {title} if ( (title !== undefined) && (title !== '') ) { var max = 512; var end = 128; var gapMark = ' [...] '; if (title.length > max) { title = title.substr(0, max - gapMark.length - end) + gapMark + title.substr(title.length - end); } title = wDiff.HtmlEscape(title); title = title.replace(/\t/g, ' '); title = title.replace(/ /g, ' '); text = text.replace(/\{title\}/, ' title="' + title + '"'); } else { text = text.replace(/\{title\}/, ''); } return text; }; // // wDiff.HtmlEscape: replace html-sensitive characters in output text with character entities // input: text // returns: escaped text // called from: wDiff.Diff(), wDiff.AssembleDiff() wDiff.HtmlEscape = function (text) { text = text.replace(/&/g, '&'); text = text.replace(/</g, '<'); text = text.replace(/>/g, '>'); text = text.replace(/"/g, '"'); return (text); }; // // wDiff.HtmlFormat: tidy html, keep newlines and multiple spaces, add container // changes: text.diff // called from: wDiff.Diff(), wDiff.AssembleDiff() wDiff.HtmlFormat = function (text) { text.diff = text.diff.replace(/<\/(\w+)><!--wDiff(Delete|Insert)--><\1\b[^>]*\bclass="wDiff\2"[^>]*>/g, ''); text.diff = text.diff.replace(/\t/g, wDiff.htmlTab); text.diff = wDiff.htmlContainerStart + wDiff.htmlFragmentStart + text.diff + wDiff.htmlFragmentEnd + wDiff.htmlContainerEnd; return; }; // wDiff.ShortenOutput: shorten diff html by removing unchanged parts // input: diff html string from wDiff.Diff() // returns: shortened html with removed unchanged passages indicated by (...) or separator wDiff.ShortenOutput = function (html) { var diff = ''; // empty text if ( (html === undefined) || (html === '') ) { return ''; } // remove container by non-regExp replace html = html.replace(wDiff.htmlContainerStart, ''); html = html.replace(wDiff.htmlFragmentStart, ''); html = html.replace(wDiff.htmlFragmentEnd, ''); html = html.replace(wDiff.htmlContainerEnd, ''); // scan for diff html tags var regExpDiff = /<\w+\b[^>]*\bclass="[^">]*?\bwDiff(MarkLeft|MarkRight|BlockLeft|BlockRight|Delete|Insert)\b[^">]*"[^>]*>(.|\n)*?<!--wDiff\1-->/g; var tagsStart = []; var tagsEnd = []; var i = 0; var regExpMatch; // save tag positions while ( (regExpMatch = regExpDiff.exec(html)) !== null ) { // combine consecutive diff tags if ( (i > 0) && (tagsEnd[i - 1] == regExpMatch.index) ) { tagsEnd[i - 1] = regExpMatch.index + regExpMatch[0].length; } else { tagsStart[i] = regExpMatch.index; tagsEnd[i] = regExpMatch.index + regExpMatch[0].length; i ++; } } // no diff tags detected if (tagsStart.length === 0) { return wDiff.htmlNoChange; } // define regexps var regExpLine = /^(\n+|.)|(\n+|.)$|\n+/g; var regExpHeading = /(^|\n)(<[^>]+>)*(==+.+?==+|\{\||\|\}).*?\n?/g; var regExpParagraph = /^(\n\n+|.)|(\n\n+|.)$|\n\n+/g; var regExpBlank = /(<[^>]+>)*\s+/g; // get line positions var regExpMatch; var lines = []; while ( (regExpMatch = regExpLine.exec(html)) !== null) { lines.push(regExpMatch.index); } // get heading positions var headings = []; var headingsEnd = []; while ( (regExpMatch = regExpHeading.exec(html)) !== null ) { headings.push(regExpMatch.index); headingsEnd.push(regExpMatch.index + regExpMatch[0].length); } // get paragraph positions var paragraphs = []; while ( (regExpMatch = regExpParagraph.exec(html)) !== null ) { paragraphs.push(regExpMatch.index); } // determine fragment border positions around diff tags var lineMaxBefore = 0; var headingBefore = 0; var paragraphBefore = 0; var lineBefore = 0; var lineMaxAfter = 0; var headingAfter = 0; var paragraphAfter = 0; var lineAfter = 0; var rangeStart = []; var rangeEnd = []; var rangeStartType = []; var rangeEndType = []; // cycle through diff tag start positions for (var i = 0; i < tagsStart.length; i ++) { var tagStart = tagsStart[i]; var tagEnd = tagsEnd[i]; // maximal lines to search before diff tag var rangeStartMin = 0; for (var j = lineMaxBefore; j < lines.length - 1; j ++) { if (tagStart < lines[j + 1]) { if (j - wDiff.linesBeforeMax >= 0) { rangeStartMin = lines[j - wDiff.linesBeforeMax]; } lineMaxBefore = j; break; } } // find last heading before diff tag if (rangeStart[i] === undefined) { for (var j = headingBefore; j < headings.length - 1; j ++) { if (headings[j] > tagStart) { break; } if (headings[j + 1] > tagStart) { if ( (headings[j] > tagStart - wDiff.headingBefore) && (headings[j] > rangeStartMin) ) { rangeStart[i] = headings[j]; rangeStartType[i] = 'heading'; headingBefore = j; } break; } } } // find last paragraph before diff tag if (rangeStart[i] === undefined) { for (var j = paragraphBefore; j < paragraphs.length - 1; j ++) { if (paragraphs[j] > tagStart) { break; } if (paragraphs[j + 1] > tagStart - wDiff.paragraphBeforeMin) { if ( (paragraphs[j] > tagStart - wDiff.paragraphBeforeMax) && (paragraphs[j] > rangeStartMin) ) { rangeStart[i] = paragraphs[j]; rangeStartType[i] = 'paragraph'; paragraphBefore = j; } break; } } } // find last line break before diff tag if (rangeStart[i] === undefined) { for (var j = lineBefore; j < lines.length - 1; j ++) { if (lines[j + 1] > tagStart - wDiff.lineBeforeMin) { if ( (lines[j] > tagStart - wDiff.lineBeforeMax) && (lines[j] > rangeStartMin) ) { rangeStart[i] = lines[j]; rangeStartType[i] = 'line'; lineBefore = j; } break; } } } // find last blank before diff tag if (rangeStart[i] === undefined) { var lastPos = tagStart - wDiff.blankBeforeMax; if (lastPos < rangeStartMin) { lastPos = rangeStartMin; } regExpBlank.lastIndex = lastPos; while ( (regExpMatch = regExpBlank.exec(html)) !== null ) { if (regExpMatch.index > tagStart - wDiff.blankBeforeMin) { rangeStart[i] = lastPos; rangeStartType[i] = 'blank'; break; } lastPos = regExpMatch.index; } } // fixed number of chars before diff tag if (rangeStart[i] === undefined) { if (tagStart - wDiff.charsBefore > rangeStartMin) { rangeStart[i] = tagStart - wDiff.charsBefore; rangeStartType[i] = 'chars'; } } // fixed number of lines before diff tag if (rangeStart[i] === undefined) { rangeStart[i] = rangeStartMin; rangeStartType[i] = 'lines'; } // maximal lines to search after diff tag var rangeEndMax = html.length; for (var j = lineMaxAfter; j < lines.length; j ++) { if (lines[j] > tagEnd) { if (j + wDiff.linesAfterMax < lines.length) { rangeEndMax = lines[j + wDiff.linesAfterMax]; } lineMaxAfter = j; break; } } // find first heading after diff tag if (rangeEnd[i] === undefined) { for (var j = headingAfter; j < headingsEnd.length; j ++) { if (headingsEnd[j] > tagEnd) { if ( (headingsEnd[j] < tagEnd + wDiff.headingAfter) && (headingsEnd[j] < rangeEndMax) ) { rangeEnd[i] = headingsEnd[j]; rangeEndType[i] = 'heading'; paragraphAfter = j; } break; } } } // find first paragraph after diff tag if (rangeEnd[i] === undefined) { for (var j = paragraphAfter; j < paragraphs.length; j ++) { if (paragraphs[j] > tagEnd + wDiff.paragraphAfterMin) { if ( (paragraphs[j] < tagEnd + wDiff.paragraphAfterMax) && (paragraphs[j] < rangeEndMax) ) { rangeEnd[i] = paragraphs[j]; rangeEndType[i] = 'paragraph'; paragraphAfter = j; } break; } } } // find first line break after diff tag if (rangeEnd[i] === undefined) { for (var j = lineAfter; j < lines.length; j ++) { if (lines[j] > tagEnd + wDiff.lineAfterMin) { if ( (lines[j] < tagEnd + wDiff.lineAfterMax) && (lines[j] < rangeEndMax) ) { rangeEnd[i] = lines[j]; rangeEndType[i] = 'line'; lineAfter = j; } break; } } } // find blank after diff tag if (rangeEnd[i] === undefined) { regExpBlank.lastIndex = tagEnd + wDiff.blankAfterMin; if ( (regExpMatch = regExpBlank.exec(html)) !== null ) { if ( (regExpMatch.index < tagEnd + wDiff.blankAfterMax) && (regExpMatch.index < rangeEndMax) ) { rangeEnd[i] = regExpMatch.index; rangeEndType[i] = 'blank'; } } } // fixed number of chars after diff tag if (rangeEnd[i] === undefined) { if (tagEnd + wDiff.charsAfter < rangeEndMax) { rangeEnd[i] = tagEnd + wDiff.charsAfter; rangeEndType[i] = 'chars'; } } // fixed number of lines after diff tag if (rangeEnd[i] === undefined) { rangeEnd[i] = rangeEndMax; rangeEndType[i] = 'lines'; } } // remove overlaps, join close fragments var fragmentStart = []; var fragmentEnd = []; var fragmentStartType = []; var fragmentEndType = []; fragmentStart[0] = rangeStart[0]; fragmentEnd[0] = rangeEnd[0]; fragmentStartType[0] = rangeStartType[0]; fragmentEndType[0] = rangeEndType[0]; var j = 1; for (var i = 1; i < rangeStart.length; i ++) { // get lines between fragments var lines = 0; if (fragmentEnd[j - 1] < rangeStart[i]) { var join = html.substring(fragmentEnd[j - 1], rangeStart[i]); lines = (join.match(/\n/g) || []).length; } if ( (rangeStart[i] > fragmentEnd[j - 1] + wDiff.fragmentJoinChars) || (lines > wDiff.fragmentJoinLines) ) { fragmentStart[j] = rangeStart[i]; fragmentEnd[j] = rangeEnd[i]; fragmentStartType[j] = rangeStartType[i]; fragmentEndType[j] = rangeEndType[i]; j ++; } else { fragmentEnd[j - 1] = rangeEnd[i]; fragmentEndType[j - 1] = rangeEndType[i]; } } // assemble the fragments for (var i = 0; i < fragmentStart.length; i ++) { // get text fragment var fragment = html.substring(fragmentStart[i], fragmentEnd[i]); fragment = fragment.replace(/^\n+|\n+$/g, ''); // add inline marks for omitted chars and words if (fragmentStart[i] > 0) { if (fragmentStartType[i] == 'chars') { fragment = wDiff.htmlOmittedChars + fragment; } else if (fragmentStartType[i] == 'blank') { fragment = wDiff.htmlOmittedChars + ' ' + fragment; } } if (fragmentEnd[i] < html.length) { if (fragmentStartType[i] == 'chars') { fragment = fragment + wDiff.htmlOmittedChars; } else if (fragmentStartType[i] == 'blank') { fragment = fragment + ' ' + wDiff.htmlOmittedChars; } } // remove leading and trailing empty lines fragment = fragment.replace(/^\n+|\n+$/g, ''); // add fragment separator if (i > 0) { diff += wDiff.htmlSeparator; } // encapsulate span errors diff += wDiff.htmlFragmentStart + fragment + wDiff.htmlFragmentEnd; } // add to container diff = wDiff.htmlContainerStart + diff + wDiff.htmlContainerEnd; // WED('diff', diff); return diff; }; // // wDiff.AddScript: add script to head // wDiff.AddScript = function (code) { var script = document.createElement('script'); script.id = 'wDiffBlockHandler'; if (script.innerText !== undefined) { script.innerText = code; } else { script.textContent = code; } document.getElementsByTagName('head')[0].appendChild(script); return; }; // // wDiff.AddStyleSheet: add CSS rules to new style sheet, cross-browser >= IE6 // wDiff.AddStyleSheet = function (css) { var style = document.createElement('style'); style.type = 'text/css'; if (style.styleSheet !== undefined) { style.styleSheet.cssText = css; } else { style.appendChild( document.createTextNode(css) ); } document.getElementsByTagName('head')[0].appendChild(style); return; }; // // wDiff.WordCount: count words in string // wDiff.WordCount = function (string) { return (string.match(wDiff.regExpWordCount) || []).length; }; // // wDiff.DebugText: dump text (text.oldText or text.newText) object // wDiff.DebugText = function (text) { var dump = 'first: ' + text.first + '\tlast: ' + text.last + '\n'; dump += '\ni \tlink \t(prev \tnext) \t#num \t"token"\n'; var i = text.first; while ( (i !== null) && (text.tokens[i] !== null) ) { dump += i + ' \t' + text.tokens[i].link + ' \t(' + text.tokens[i].prev + ' \t' + text.tokens[i].next + ') \t#' + text.tokens[i].number + ' \t' + wDiff.DebugShortenString(text.tokens[i].token) + '\n'; i = text.tokens[i].next; } return dump; }; // // wDiff.DebugBlocks: dump blocks object // wDiff.DebugBlocks = function (blocks) { var dump = '\ni \toldBl \tnewBl \toldNm \tnewNm \toldSt \tcount \tuniq \twords \tchars \ttype \tsect \tgroup \tfixed \tstring\n'; for (var i = 0; i < blocks.length; i ++) { dump += i + ' \t' + blocks[i].oldBlock + ' \t' + blocks[i].newBlock + ' \t' + blocks[i].oldNumber + ' \t' + blocks[i].newNumber + ' \t' + blocks[i].oldStart + ' \t' + blocks[i].count + ' \t' + blocks[i].unique + ' \t' + blocks[i].words + ' \t' + blocks[i].chars + ' \t' + blocks[i].type + ' \t' + blocks[i].section + ' \t' + blocks[i].group + ' \t' + blocks[i].fixed + ' \t' + wDiff.DebugShortenString(blocks[i].string) + '\n'; } return dump; }; // // wDiff.DebugGroups: dump groups object // wDiff.DebugGroups = function (groups) { var dump = '\ni \tblSta \tblEnd \tuniq \tmaxWo \twords \tchars \tfixed \toldNm \tmFrom \tcolor \tmoved \tdiff\n'; for (var i = 0; i < groups.length; i ++) { dump += i + ' \t' + groups[i].blockStart + ' \t' + groups[i].blockEnd + ' \t' + groups[i].unique + ' \t' + groups[i].maxWords + ' \t' + groups[i].words + ' \t' + groups[i].chars + ' \t' + groups[i].fixed + ' \t' + groups[i].oldNumber + ' \t' + groups[i].movedFrom + ' \t' + groups[i].color + ' \t' + groups[i].moved.toString() + ' \t' + wDiff.DebugShortenString(groups[i].diff) + '\n'; } return dump; }; // // wDiff.DebugGaps: dump gaps object // wDiff.DebugGaps = function (gaps) { var dump = '\ni \tnFirs \tnLast \tnTok \toFirs \toLast \toTok \tcharSplit\n'; for (var i = 0; i < gaps.length; i ++) { dump += i + ' \t' + gaps[i].newFirst + ' \t' + gaps[i].newLast + ' \t' + gaps[i].newTokens + ' \t' + gaps[i].oldFirst + ' \t' + gaps[i].oldLast + ' \t' + gaps[i].oldTokens + ' \t' + gaps[i].charSplit + '\n'; } return dump; }; // // wDiff.DebugShortenString: shorten string for debugging // wDiff.DebugShortenString = function (string) { if (string === null) { return 'null'; } string = string.replace(/\n/g, '\\n'); var max = 100; if (string.length > max) { string = string.substr(0, max - 1 - 30) + '…' + string.substr(string.length - 30); } return '"' + string + '"'; }; // initialize wDiff wDiff.Init(); // </syntaxhighlight>
QingJ © 2025
镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址