5 * Copyright 2006 Google Inc.
6 * http://code.google.com/p/google-diff-match-patch/
8 * php port by Tobias Buschor shwups.ch
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
24 * @fileoverview Computes the difference between two texts to create a patch.
25 * Applies the patch onto another text, allowing for errors.
26 * @author fraser@google.com (Neil Fraser)
30 * Class containing the diff, match and patch methods.
33 class diff_match_patch
{
36 // Redefine these in your program to override the defaults.
38 // Number of seconds to map a diff before giving up (0 for infinity).
39 public $Diff_Timeout = 1.0;
40 // Cost of an empty edit operation in terms of edit characters.
41 public $Diff_EditCost = 4;
42 // The size beyond which the double-ended diff activates.
43 // Double-ending is twice as fast, but less accurate.
44 public $Diff_DualThreshold = 32;
45 // At what point is no match declared (0.0 = perfection, 1.0 = very loose).
46 public $Match_Threshold = 0.5;
47 // How far to search for a match (0 = exact location, 1000+ = broad match).
48 // A match this many characters away from the expected location will add
49 // 1.0 to the score (0.0 is a perfect match).
50 public $Match_Distance = 1000;
51 // When deleting a large block of text (over ~64 characters), how close does
52 // the contents have to match the expected contents. (0.0 = perfection,
53 // 1.0 = very loose). Note that Match_Threshold controls how closely the
54 // end points of a delete need to match.
55 public $Patch_DeleteThreshold = 0.5;
56 // Chunk size for context length.
57 public $Patch_Margin = 4;
60 * Compute the number of bits in an int.
61 * The normal answer for JavaScript is 32.
62 * @return {number} Max bits
64 function getMaxBits() {
68 while (oldi != newi) {
75 // How many bits in a number?
76 this.Match_MaxBits = getMaxBits();
81 * Find the differences between two texts. Simplifies the problem by stripping
82 * any common prefix or suffix off the texts before diffing.
83 * @param {string} text1 Old string to be diffed.
84 * @param {string} text2 New string to be diffed.
85 * @param {boolean} opt_checklines Optional speedup flag. If present and false,
86 * then don't run a line-level diff first to identify the changed areas.
87 * Defaults to true, which does a faster, slightly less optimal diff
88 * @return {Array.<Array.<number|string>>} Array of diff tuples.
90 function diff_main($text1, $text2, $checklines = true) {
91 // Check for equality (speedup)
92 if ($text1 === $text2) {
93 return array ( array ( DIFF_EQUAL
, $text1) );
96 // Trim off common prefix (speedup)
97 $commonlength = $this->diff_commonPrefix($text1, $text2);
98 $commonprefix = mb_substr($text1, 0, $commonlength);
99 $text1 = mb_substr($text1, $commonlength);
100 $text2 = mb_substr($text2, $commonlength);
102 // Trim off common suffix (speedup)
103 $commonlength = $this->diff_commonSuffix($text1, $text2);
104 $commonsuffix = mb_substr($text1, mb_strlen($text1) - $commonlength);
105 $text1 = mb_substr($text1, 0, mb_strlen($text1) - $commonlength);
106 $text2 = mb_substr($text2, 0, mb_strlen($text2) - $commonlength);
108 // Compute the diff on the middle block
109 $diffs = $this->diff_compute($text1, $text2, $checklines);
111 // Restore the prefix and suffix
112 if ($commonprefix !== '') {
113 array_unshift($diffs, array ( DIFF_EQUAL
, $commonprefix ));
115 if ($commonsuffix !== '') {
116 array_push($diffs, array ( DIFF_EQUAL
, $commonsuffix ));
118 $this->diff_cleanupMerge($diffs);
123 * Find the differences between two texts. Assumes that the texts do not
124 * have any common prefix or suffix.
125 * @param {string} text1 Old string to be diffed.
126 * @param {string} text2 New string to be diffed.
127 * @param {boolean} checklines Speedup flag. If false, then don't run a
128 * line-level diff first to identify the changed areas.
129 * If true, then run a faster, slightly less optimal diff
130 * @return {Array.<Array.<number|string>>} Array of diff tuples.
133 function diff_compute($text1, $text2, $checklines) {
136 // Just add some text (speedup)
137 return array ( array ( DIFF_INSERT
, $text2 ) );
141 // Just delete some text (speedup)
142 return array ( array ( DIFF_DELETE
, $text1 ) );
145 $longtext = mb_strlen($text1) > mb_strlen($text2) ?
$text1 : $text2;
146 $shorttext = mb_strlen($text1) > mb_strlen($text2) ?
$text2 : $text1;
147 $i = mb_strpos($longtext, $shorttext);
149 // Shorter text is inside the longer text (speedup)
151 array ( DIFF_INSERT
, mb_substr($longtext, 0, $i) ),
152 array ( DIFF_EQUAL
, $shorttext ),
153 array ( DIFF_INSERT
, mb_substr($longtext, $i +
mb_strlen($shorttext)) )
156 // Swap insertions for deletions if diff is reversed.
157 if (mb_strlen($text1) > mb_strlen($text2)) {
158 $diffs[0][0] = $diffs[2][0] = DIFF_DELETE
;
162 $longtext = $shorttext = null; // Garbage collect
164 // Check to see if the problem can be split in two.
165 $hm = $this->diff_halfMatch($text1, $text2);
167 // A half-match was found, sort out the return data.
172 $mid_common = $hm[4];
173 // Send both pairs off for separate processing.
174 $diffs_a = $this->diff_main($text1_a, $text2_a, $checklines);
175 $diffs_b = $this->diff_main($text1_b, $text2_b, $checklines);
176 // Merge the results.
177 return array_merge($diffs_a, array (
185 // Perform a real diff.
186 if ($checklines && (mb_strlen($text1) < 100 ||
mb_strlen($text2) < 100)) {
187 // Too trivial for the overhead.
192 // Scan the text on a line-by-line basis first.
193 $a = $this->diff_linesToChars($text1, $text2);
198 $diffs = $this->diff_map($text1, $text2);
200 // No acceptable result.
213 // Convert the diff back to original text.
214 $this->diff_charsToLines($diffs, $linearray);
215 // Eliminate freak matches (e.g. blank lines)
216 $this->diff_cleanupSemantic($diffs);
218 // Rediff any replacement blocks, this time character-by-character.
219 // Add a dummy entry at the end.
220 array_push($diffs, array (
229 while ($pointer < count($diffs)) {
230 switch ($diffs[$pointer][0]) {
233 $text_insert .= $diffs[$pointer][1];
237 $text_delete .= $diffs[$pointer][1];
240 // Upon reaching an equality, check for prior redundancies.
241 if ($count_delete >= 1 && $count_insert >= 1) {
242 // Delete the offending records and add the merged ones.
243 $a = $this->diff_main($text_delete, $text_insert, false);
244 array_splice($diffs, $pointer - $count_delete - $count_insert, $count_delete +
$count_insert);
246 $pointer = $pointer - $count_delete - $count_insert;
247 for ($j = count($a) - 1; $j >= 0; $j--) {
248 array_splice($diffs, $pointer, 0, array($a[$j]));
250 $pointer = $pointer +
count($a);
260 array_pop($diffs); // Remove the dummy entry at the end.
266 * Split two texts into an array of strings. Reduce the texts to a string of
267 * hashes where each Unicode character represents one line.
268 * @param {string} text1 First string.
269 * @param {string} text2 Second string.
270 * @return {Array.<string|Array.<string>>} Three element Array, containing the
271 * encoded text1, the encoded text2 and the array of unique strings. The
272 * zeroth element of the array of unique strings is intentionally blank.
275 function diff_linesToChars($text1, $text2) {
276 $lineArray = array(); // e.g. lineArray[4] == 'Hello\n'
277 $lineHash = array(); // e.g. lineHash['Hello\n'] == 4
279 // '\x00' is a valid character, but various debuggers don't like it.
280 // So we'll insert a junk entry to avoid generating a null character.
283 $chars1 = $this->diff_linesToCharsMunge($text1, $lineArray, $lineHash);
284 $chars2 = $this->diff_linesToCharsMunge($text2, $lineArray, $lineHash);
293 * Split a text into an array of strings. Reduce the texts to a string of
294 * hashes where each Unicode character represents one line.
295 * Modifies linearray and linehash through being a closure.
296 * @param {string} text String to encode
297 * @return {string} Encoded string
300 function diff_linesToCharsMunge($text, &$lineArray, &$lineHash) {
302 // Walk the text, pulling out a mb_substring for each line.
303 // text.split('\n') would would temporarily double our memory footprint.
304 // Modifying text would create many large strings to garbage collect.
307 // Keeping our own length variable is faster than looking it up.
308 $lineArrayLength = count($lineArray);
309 while ($lineEnd < mb_strlen($text) - 1) {
310 $lineEnd = mb_strpos($text, "\n", $lineStart);
311 if ($lineEnd === false) {
312 $lineEnd = mb_strlen($text) - 1;
314 $line = mb_substr($text, $lineStart, $lineEnd +
1 -$lineStart);
315 $lineStart = $lineEnd +
1;
317 if ( isset($lineHash[$line]) ) {
318 $chars .= mb_chr($lineHash[$line]);
320 $chars .= mb_chr($lineArrayLength);
321 $lineHash[$line] = $lineArrayLength;
322 $lineArray[$lineArrayLength++
] = $line;
328 * Rehydrate the text in a diff from a string of line hashes to real lines of
330 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
331 * @param {Array.<string>} lineArray Array of unique strings.
334 function diff_charsToLines(&$diffs, $lineArray) {
335 for ($x = 0; $x < count($diffs); $x++
) {
336 $chars = $diffs[$x][1];
338 for ($y = 0; $y < mb_strlen($chars); $y++
) {
339 $text[$y] = $lineArray[charCodeAt($chars, $y)];
341 $diffs[$x][1] = implode('',$text);
346 * Explore the intersection points between the two texts.
347 * @param {string} text1 Old string to be diffed.
348 * @param {string} text2 New string to be diffed.
349 * @return {Array.<Array.<number|string>>?} Array of diff tuples or null if no
353 function diff_map($text1, $text2) {
354 // Don't run for too long.
355 $ms_end = microtime(true) +
$this->Diff_Timeout
;
357 // Cache the text lengths to prevent multiple calls.
358 $text1_length = mb_strlen($text1);
359 $text2_length = mb_strlen($text2);
360 $max_d = $text1_length +
$text2_length -1;
361 $doubleEnd = $this->Diff_DualThreshold
* 2 < $max_d;
370 $footstep = null; // Used to track overlapping paths.
371 $footsteps = array();
373 // Safari 1.x doesn't have hasOwnProperty
374 //? $hasOwnProperty = !!(footsteps.hasOwnProperty);
375 // If the total number of characters is odd, then the front path will collide
376 // with the reverse path.
377 $front = ($text1_length +
$text2_length) %
2;
378 for ($d = 0; $d < $max_d; $d++
) {
379 // Bail out if timeout reached.
380 if ($this->Diff_Timeout
> 0 && microtime(true) > $ms_end) {
384 // Walk the front path one step.
385 $v_map1[$d] = array ();
386 for ($k = -$d; $k <= $d; $k +
= 2) {
387 if ($k == -$d ||
$k != $d && $v1[$k -1] < $v1[$k +
1]) {
394 $footstep = $x . ',' . $y;
395 if ($front && isset ($footsteps[$footstep])) {
399 $footsteps[$footstep] = $d;
402 while (!$done && ($x < $text1_length) && ($y < $text2_length) && (mb_substr($text1, $x, 1) == mb_substr($text2, $y, 1)) ) {
406 $footstep = $x . ',' . $y;
407 if ($front && isset ($footsteps[$footstep])) {
411 $footsteps[$footstep] = $d;
416 $v_map1[$d][$x . ',' . $y] = true;
417 if ($x == $text1_length && $y == $text2_length) {
418 // Reached the end in single-path mode.
419 return $this->diff_path1($v_map1, $text1, $text2);
422 // Front path ran over reverse path.
424 $v_map2 = array_slice($v_map2, 0, $footsteps[$footstep] +
1);
425 $a = $this->diff_path1($v_map1, mb_substr($text1, 0, $x), mb_substr($text2, 0, $y));
427 return array_merge($a, $this->diff_path2($v_map2, mb_substr($text1, $x), mb_substr($text2, $y)));
432 // Walk the reverse path one step.
433 $v_map2[$d] = array();
434 for ($k = -$d; $k <= $d; $k +
= 2) {
435 if ($k == -$d ||
$k != $d && $v2[$k -1] < $v2[$k +
1]) {
441 $footstep = ($text1_length - $x) . ',' . ($text2_length - $y);
442 if (!$front && isset ($footsteps[$footstep])) {
446 $footsteps[$footstep] = $d;
448 while (!$done && $x < $text1_length && $y < $text2_length && mb_substr($text1, $text1_length - $x -1, 1) == mb_substr($text2, $text2_length - $y -1, 1) ) {
451 $footstep = ($text1_length - $x) . ',' . ($text2_length - $y);
452 if (!$front && isset ($footsteps[$footstep])) {
456 $footsteps[$footstep] = $d;
460 $v_map2[$d][$x . ',' . $y] = true;
462 // Reverse path ran over front path.
463 $v_map1 = array_slice($v_map1, 0, $footsteps[$footstep] +
1);
464 $a = $this->diff_path1($v_map1, mb_substr($text1, 0, $text1_length - $x), mb_substr($text2, 0, $text2_length - $y));
465 return array_merge($a, $this->diff_path2($v_map2, mb_substr($text1, $text1_length - $x), mb_substr($text2, $text2_length - $y)));
470 // Number of diffs equals number of characters, no commonality at all.
475 * Work from the middle back to the start to determine the path.
476 * @param {Array.<Object>} v_map Array of paths.ers
477 * @param {string} text1 Old string fragment to be diffed.
478 * @param {string} text2 New string fragment to be diffed.
479 * @return {Array.<Array.<number|string>>} Array of diff tuples.
482 function diff_path1($v_map, $text1, $text2) {
484 $x = mb_strlen($text1);
485 $y = mb_strlen($text2);
486 /** @type {number?} */
488 for ($d = count($v_map) - 2; $d >= 0; $d--) {
490 if (isset ($v_map[$d][($x -1) . ',' . $y])) {
492 if ($last_op === DIFF_DELETE
) {
493 $path[0][1] = mb_substr($text1, $x, 1) . $path[0][1];
495 array_unshift($path, array (
497 mb_substr($text1, $x, 1)
500 $last_op = DIFF_DELETE
;
502 } elseif (isset ($v_map[$d][$x . ',' . ($y -1)])) {
504 if ($last_op === DIFF_INSERT
) {
505 $path[0][1] = mb_substr($text2, $y, 1) . $path[0][1];
507 array_unshift($path, array (
509 mb_substr($text2, $y, 1)
512 $last_op = DIFF_INSERT
;
517 //if (text1.charAt(x) != text2.charAt(y)) {
518 // throw new Error('No diagonal. Can\'t happen. (diff_path1)');
520 if ($last_op === DIFF_EQUAL
) {
521 $path[0][1] = mb_substr($text1, $x, 1) . $path[0][1];
523 array_unshift($path, array (
525 mb_substr($text1, $x, 1)
528 $last_op = DIFF_EQUAL
;
536 * Work from the middle back to the end to determine the path.
537 * @param {Array.<Object>} v_map Array of paths.
538 * @param {string} text1 Old string fragment to be diffed.
539 * @param {string} text2 New string fragment to be diffed.
540 * @return {Array.<Array.<number|string>>} Array of diff tuples.
543 function diff_path2($v_map, $text1, $text2) {
546 $x = mb_strlen($text1);
547 $y = mb_strlen($text2);
548 /** @type {number?} */
550 for ($d = count($v_map) - 2; $d >= 0; $d--) {
552 if (isset ($v_map[$d][($x -1) . ',' . $y])) {
554 if ($last_op === DIFF_DELETE
) {
555 $path[$pathLength -1][1] .= $text1[mb_strlen($text1) - $x -1];
557 $path[$pathLength++
] = array (
559 $text1[mb_strlen($text1) - $x -1]
562 $last_op = DIFF_DELETE
;
565 elseif (isset ($v_map[$d][$x . ',' . ($y -1)])) {
567 if ($last_op === DIFF_INSERT
) {
568 $path[$pathLength -1][1] .= $text2[mb_strlen($text2) - $y -1];
570 $path[$pathLength++
] = array (
572 $text2[mb_strlen($text2) - $y -1]
575 $last_op = DIFF_INSERT
;
580 //if (text1.charAt(text1.length - x - 1) !=
581 // text2.charAt(text2.length - y - 1)) {
582 // throw new Error('No diagonal. Can\'t happen. (diff_path2)');
584 if ($last_op === DIFF_EQUAL
) {
585 $path[$pathLength -1][1] .= $text1[mb_strlen($text1) - $x -1];
587 $path[$pathLength++
] = array (
589 $text1[mb_strlen($text1) - $x -1]
592 $last_op = DIFF_EQUAL
;
600 * Determine the common prefix of two strings
601 * @param {string} text1 First string.
602 * @param {string} text2 Second string.
603 * @return {number} The number of characters common to the start of each
606 function diff_commonPrefix($text1, $text2) {
607 for ($i = 0; 1; $i++
) {
608 $t1 = mb_substr($text1, $i, 1);
609 $t2 = mb_substr($text2, $i, 1);
610 if($t1==='' ||
$t2==='' ||
$t1 !== $t2 ){
617 * Determine the common suffix of two strings
618 * @param {string} text1 First string.
619 * @param {string} text2 Second string.
620 * @return {number} The number of characters common to the end of each string.
622 function diff_commonSuffix($text1, $text2) {
623 return $this->diff_commonPrefix( strrev($text1), strrev($text2) );
627 * Do the two texts share a mb_substring which is at least half the length of the
629 * @param {string} text1 First string.
630 * @param {string} text2 Second string.
631 * @return {Array.<string>?} Five element Array, containing the prefix of
632 * text1, the suffix of text1, the prefix of text2, the suffix of
633 * text2 and the common middle. Or null if there was no match.
635 function diff_halfMatch($text1, $text2) {
636 $longtext = mb_strlen($text1) > mb_strlen($text2) ?
$text1 : $text2;
637 $shorttext = mb_strlen($text1) > mb_strlen($text2) ?
$text2 : $text1;
638 if (mb_strlen($longtext) < 10 ||
mb_strlen($shorttext) < 1) {
639 return null; // Pointless.
642 // First check if the second quarter is the seed for a half-match.
643 $hm1 = $this->diff_halfMatchI($longtext, $shorttext, ceil(mb_strlen($longtext) / 4));
644 // Check again based on the third quarter.
645 $hm2 = $this->diff_halfMatchI($longtext, $shorttext, ceil(mb_strlen($longtext) / 2));
647 if (!$hm1 && !$hm2) {
654 // Both matched. Select the longest.
655 $hm = mb_strlen($hm1[4]) > mb_strlen($hm2[4]) ?
$hm1 : $hm2;
658 // A half-match was found, sort out the return data.
659 if (mb_strlen($text1) > mb_strlen($text2)) {
670 $mid_common = $hm[4];
671 return array( $text1_a, $text1_b, $text2_a, $text2_b, $mid_common );
675 * Does a mb_substring of shorttext exist within longtext such that the mb_substring
676 * is at least half the length of longtext?
677 * Closure, but does not reference any external variables.
678 * @param {string} longtext Longer string.
679 * @param {string} shorttext Shorter string.
680 * @param {number} i Start index of quarter length mb_substring within longtext
681 * @return {Array.<string>?} Five element Array, containing the prefix of
682 * longtext, the suffix of longtext, the prefix of shorttext, the suffix
683 * of shorttext and the common middle. Or null if there was no match.
686 function diff_halfMatchI($longtext, $shorttext, $i) {
687 // Start with a 1/4 length mb_substring at position i as a seed.
688 $seed = mb_substr($longtext, $i, floor(mb_strlen($longtext) / 4));
692 $best_longtext_a = null;
693 $best_longtext_b = null;
694 $best_shorttext_a = null;
695 $best_shorttext_b = null;
696 while ( ($j = mb_strpos($shorttext, $seed, $j +
1)) !== false ) {
697 $prefixLength = $this->diff_commonPrefix(mb_substr($longtext, $i), mb_substr($shorttext, $j));
698 $suffixLength = $this->diff_commonSuffix(mb_substr($longtext, 0, $i), mb_substr($shorttext, 0, $j));
699 if (mb_strlen($best_common) < $suffixLength +
$prefixLength) {
700 $best_common = mb_substr($shorttext, $j - $suffixLength, $suffixLength) . mb_substr($shorttext, $j, $prefixLength);
701 $best_longtext_a = mb_substr($longtext, 0, $i - $suffixLength);
702 $best_longtext_b = mb_substr($longtext, $i +
$prefixLength);
703 $best_shorttext_a = mb_substr($shorttext, 0, $j - $suffixLength);
704 $best_shorttext_b = mb_substr($shorttext, $j +
$prefixLength);
707 if (mb_strlen($best_common) >= mb_strlen($longtext) / 2) {
721 * Reduce the number of edits by eliminating semantically trivial equalities.
722 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
724 function diff_cleanupSemantic(&$diffs) {
726 $equalities = array (); // Stack of indices where equalities are found.
727 $equalitiesLength = 0; // Keeping our own length var is faster in JS.
728 $lastequality = null; // Always equal to equalities[equalitiesLength-1][1]
729 $pointer = 0; // Index of current position.
730 // Number of characters that changed prior to the equality.
731 $length_changes1 = 0;
732 // Number of characters that changed after the equality.
733 $length_changes2 = 0;
734 while ($pointer < count($diffs)) {
735 if ($diffs[$pointer][0] == DIFF_EQUAL
) { // equality found
736 $equalities[$equalitiesLength++
] = $pointer;
737 $length_changes1 = $length_changes2;
738 $length_changes2 = 0;
739 $lastequality = $diffs[$pointer][1];
740 } else { // an insertion or deletion
741 $length_changes2 +
= mb_strlen($diffs[$pointer][1]);
742 if ($lastequality !== null && (mb_strlen($lastequality) <= $length_changes1) && (mb_strlen($lastequality) <= $length_changes2)) {
744 $zzz_diffs = array_splice($diffs, $equalities[$equalitiesLength -1], 0, array(array (
748 // Change second copy to insert.
749 $diffs[$equalities[$equalitiesLength -1] +
1][0] = DIFF_INSERT
;
750 // Throw away the equality we just deleted.
752 // Throw away the previous equality (it needs to be reevaluated).
754 $pointer = $equalitiesLength > 0 ?
$equalities[$equalitiesLength -1] : -1;
755 $length_changes1 = 0; // Reset the counters.
756 $length_changes2 = 0;
757 $lastequality = null;
764 $this->diff_cleanupMerge($diffs);
766 $this->diff_cleanupSemanticLossless($diffs);
770 * Look for single edits surrounded on both sides by equalities
771 * which can be shifted sideways to align the edit to a word boundary.
772 * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
773 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
775 function diff_cleanupSemanticLossless(&$diffs) {
778 // Intentionally ignore the first and last element (don't need checking).
779 while ($pointer < count($diffs) - 1) {
780 if ($diffs[$pointer -1][0] == DIFF_EQUAL
&& $diffs[$pointer +
1][0] == DIFF_EQUAL
) {
781 // This is a single edit surrounded by equalities.
782 $equality1 = $diffs[$pointer -1][1];
783 $edit = $diffs[$pointer][1];
784 $equality2 = $diffs[$pointer +
1][1];
786 // First, shift the edit as far left as possible.
787 $commonOffset = $this->diff_commonSuffix($equality1, $edit);
788 if ($commonOffset !== '') {
789 $commonString = mb_substr($edit, mb_strlen($edit) - $commonOffset);
790 $equality1 = mb_substr($equality1, 0, mb_strlen($equality1) - $commonOffset);
791 $edit = $commonString . mb_substr($edit, 0, mb_strlen($edit) - $commonOffset);
792 $equality2 = $commonString . $equality2;
795 // Second, step character by character right, looking for the best fit.
796 $bestEquality1 = $equality1;
798 $bestEquality2 = $equality2;
799 $bestScore = $this->diff_cleanupSemanticScore($equality1, $edit) +
$this->diff_cleanupSemanticScore($edit, $equality2);
800 while (isset($equality2[0]) && $edit[0] === $equality2[0]) {
801 $equality1 .= $edit[0];
802 $edit = mb_substr($edit, 1) . $equality2[0];
803 $equality2 = mb_substr($equality2, 1);
804 $score = $this->diff_cleanupSemanticScore($equality1, $edit) +
$this->diff_cleanupSemanticScore($edit, $equality2);
805 // The >= encourages trailing rather than leading whitespace on edits.
806 if ($score >= $bestScore) {
808 $bestEquality1 = $equality1;
810 $bestEquality2 = $equality2;
814 if ($diffs[$pointer -1][1] != $bestEquality1) {
815 // We have an improvement, save it back to the diff.
816 if ($bestEquality1) {
817 $diffs[$pointer -1][1] = $bestEquality1;
819 $zzz_diffs = array_splice($diffs, $pointer -1, 1);
822 $diffs[$pointer][1] = $bestEdit;
823 if ($bestEquality2) {
824 $diffs[$pointer +
1][1] = $bestEquality2;
826 $zzz_diffs = array_splice($diffs, $pointer +
1, 1);
836 * Given two strings, compute a score representing whether the internal
837 * boundary falls on logical boundaries.
838 * Scores range from 5 (best) to 0 (worst).
839 * Closure, makes reference to regex patterns defined above.
840 * @param {string} one First string
841 * @param {string} two Second string
842 * @return {number} The score.
844 function diff_cleanupSemanticScore($one, $two) {
845 // Define some regex patterns for matching boundaries.
846 $punctuation = '/[^a-zA-Z0-9]/';
847 $whitespace = '/\s/';
848 $linebreak = '/[\r\n]/';
849 $blanklineEnd = '/\n\r?\n$/';
850 $blanklineStart = '/^\r?\n\r?\n/';
852 if (!$one ||
!$two) {
853 // Edges are the best.
857 // Each port of this function behaves slightly differently due to
858 // subtle differences in each language's definition of things like
859 // 'whitespace'. Since this function's purpose is largely cosmetic,
860 // the choice has been made to use each language's native features
861 // rather than force total conformity.
863 // One point for non-alphanumeric.
864 if (preg_match($punctuation, $one[mb_strlen($one) - 1]) ||
preg_match($punctuation, $two[0])) {
866 // Two points for whitespace.
867 if (preg_match($whitespace, $one[mb_strlen($one) - 1] ) ||
preg_match($whitespace, $two[0])) {
869 // Three points for line breaks.
870 if (preg_match($linebreak, $one[mb_strlen($one) - 1]) ||
preg_match($linebreak, $two[0])) {
872 // Four points for blank lines.
873 if (preg_match($blanklineEnd, $one) ||
preg_match($blanklineStart, $two)) {
883 * Reduce the number of edits by eliminating operationally trivial equalities.
884 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
886 function diff_cleanupEfficiency(&$diffs) {
888 $equalities = array (); // Stack of indices where equalities are found.
889 $equalitiesLength = 0; // Keeping our own length var is faster in JS.
890 $lastequality = ''; // Always equal to equalities[equalitiesLength-1][1]
891 $pointer = 0; // Index of current position.
892 // Is there an insertion operation before the last equality.
894 // Is there a deletion operation before the last equality.
896 // Is there an insertion operation after the last equality.
898 // Is there a deletion operation after the last equality.
900 while ($pointer < count($diffs)) {
901 if ($diffs[$pointer][0] == DIFF_EQUAL
) { // equality found
902 if (mb_strlen($diffs[$pointer][1]) < $this->Diff_EditCost
&& ($post_ins ||
$post_del)) {
904 $equalities[$equalitiesLength++
] = $pointer;
905 $pre_ins = $post_ins;
906 $pre_del = $post_del;
907 $lastequality = $diffs[$pointer][1];
909 // Not a candidate, and can never become one.
910 $equalitiesLength = 0;
913 $post_ins = $post_del = false;
914 } else { // an insertion or deletion
915 if ($diffs[$pointer][0] == DIFF_DELETE
) {
921 * Five types to be split:
922 * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
923 * <ins>A</ins>X<ins>C</ins><del>D</del>
924 * <ins>A</ins><del>B</del>X<ins>C</ins>
925 * <ins>A</del>X<ins>C</ins><del>D</del>
926 * <ins>A</ins><del>B</del>X<del>C</del>
928 if ($lastequality && (($pre_ins && $pre_del && $post_ins && $post_del) ||
((mb_strlen($lastequality) < $this->Diff_EditCost
/ 2) && ($pre_ins +
$pre_del +
$post_ins +
$post_del) == 3))) {
930 $zzz_diffs = array_splice($diffs, $equalities[$equalitiesLength -1], 0, array(array (
934 // Change second copy to insert.
935 $diffs[$equalities[$equalitiesLength -1] +
1][0] = DIFF_INSERT
;
936 $equalitiesLength--; // Throw away the equality we just deleted;
938 if ($pre_ins && $pre_del) {
939 // No changes made which could affect previous entry, keep going.
940 $post_ins = $post_del = true;
941 $equalitiesLength = 0;
943 $equalitiesLength--; // Throw away the previous equality;
944 $pointer = $equalitiesLength > 0 ?
$equalities[$equalitiesLength -1] : -1;
945 $post_ins = $post_del = false;
954 $this->diff_cleanupMerge($diffs);
959 * Reorder and merge like edit sections. Merge equalities.
960 * Any edit section can move as long as it doesn't cross an equality.
961 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
963 function diff_cleanupMerge(&$diffs) {
964 array_push($diffs, array ( DIFF_EQUAL
, '' )); // Add a dummy entry at the end.
970 $commonlength = null;
971 while ($pointer < count($diffs)) {
972 switch ($diffs[$pointer][0]) {
975 $text_insert .= $diffs[$pointer][1];
980 $text_delete .= $diffs[$pointer][1];
984 // Upon reaching an equality, check for prior redundancies.
985 if ($count_delete !== 0 ||
$count_insert !== 0) {
986 if ($count_delete !== 0 && $count_insert !== 0) {
987 // Factor out any common prefixies.
988 $commonlength = $this->diff_commonPrefix($text_insert, $text_delete);
989 if ($commonlength !== 0) {
990 if (($pointer - $count_delete - $count_insert) > 0 && $diffs[$pointer - $count_delete - $count_insert -1][0] == DIFF_EQUAL
) {
991 $diffs[$pointer - $count_delete - $count_insert -1][1] .= mb_substr($text_insert, 0, $commonlength);
993 array_splice($diffs, 0, 0, array(array (
995 mb_substr($text_insert, 0, $commonlength)
999 $text_insert = mb_substr($text_insert, $commonlength);
1000 $text_delete = mb_substr($text_delete, $commonlength);
1002 // Factor out any common suffixies.
1003 $commonlength = $this->diff_commonSuffix($text_insert, $text_delete);
1004 if ($commonlength !== 0) {
1005 $diffs[$pointer][1] = mb_substr($text_insert, mb_strlen($text_insert) - $commonlength) . $diffs[$pointer][1];
1006 $text_insert = mb_substr($text_insert, 0, mb_strlen($text_insert) - $commonlength);
1007 $text_delete = mb_substr($text_delete, 0, mb_strlen($text_delete) - $commonlength);
1010 // Delete the offending records and add the merged ones.
1011 if ($count_delete === 0) {
1012 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+
$count_insert, array(array(
1016 } elseif ($count_insert === 0) {
1017 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+
$count_insert, array(array(
1022 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+
$count_insert, array(array(
1030 $pointer = $pointer - $count_delete - $count_insert +
($count_delete ?
1 : 0) +
($count_insert ?
1 : 0) +
1;
1031 } elseif ($pointer !== 0 && $diffs[$pointer -1][0] == DIFF_EQUAL
) {
1032 // Merge this equality with the previous one.
1033 $diffs[$pointer -1][1] .= $diffs[$pointer][1];
1034 array_splice($diffs, $pointer, 1);
1045 if ($diffs[count($diffs) - 1][1] === '') {
1046 array_pop($diffs); // Remove the dummy entry at the end.
1049 // Second pass: look for single edits surrounded on both sides by equalities
1050 // which can be shifted sideways to eliminate an equality.
1051 // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1054 // Intentionally ignore the first and last element (don't need checking).
1055 while ($pointer < count($diffs) - 1) {
1056 if ($diffs[$pointer-1][0] == DIFF_EQUAL
&& $diffs[$pointer+
1][0] == DIFF_EQUAL
) {
1057 // This is a single edit surrounded by equalities.
1058 if ( mb_substr($diffs[$pointer][1], mb_strlen($diffs[$pointer][1]) - mb_strlen($diffs[$pointer -1][1])) == $diffs[$pointer -1][1]) {
1059 // Shift the edit over the previous equality.
1060 $diffs[$pointer][1] = $diffs[$pointer -1][1] . mb_substr($diffs[$pointer][1], 0, mb_strlen($diffs[$pointer][1]) - mb_strlen($diffs[$pointer -1][1]));
1061 $diffs[$pointer +
1][1] = $diffs[$pointer -1][1] . $diffs[$pointer +
1][1];
1062 array_splice($diffs, $pointer -1, 1);
1064 } elseif (mb_substr($diffs[$pointer][1], 0, mb_strlen($diffs[$pointer +
1][1])) == $diffs[$pointer +
1][1]) {
1065 // Shift the edit over the next equality.
1066 $diffs[$pointer -1][1] .= $diffs[$pointer +
1][1];
1068 $diffs[$pointer][1] = mb_substr($diffs[$pointer][1], mb_strlen($diffs[$pointer +
1][1])) . $diffs[$pointer +
1][1];
1069 array_splice($diffs, $pointer +
1, 1);
1075 // If shifts were made, the diff needs reordering and another shift sweep.
1077 $this->diff_cleanupMerge($diffs);
1082 * loc is a location in text1, compute and return the equivalent location in
1084 * e.g. 'The cat' vs 'The big cat', 1->1, 5->8
1085 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1086 * @param {number} loc Location within text1.
1087 * @return {number} Location within text2.
1089 function diff_xIndex($diffs, $loc) {
1094 for ($x = 0; $x < count($diffs); $x++
) {
1095 if ($diffs[$x][0] !== DIFF_INSERT
) { // Equality or deletion.
1096 $chars1 +
= mb_strlen($diffs[$x][1]);
1098 if ($diffs[$x][0] !== DIFF_DELETE
) { // Equality or insertion.
1099 $chars2 +
= mb_strlen($diffs[$x][1]);
1101 if ($chars1 > $loc) { // Overshot the location.
1104 $last_chars1 = $chars1;
1105 $last_chars2 = $chars2;
1107 // Was the location was deleted?
1108 if (count($diffs) != $x && $diffs[$x][0] === DIFF_DELETE
) {
1109 return $last_chars2;
1111 // Add the remaining character length.
1112 return $last_chars2 +
($loc - $last_chars1);
1116 * Convert a diff array into a pretty HTML report.
1117 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1118 * @return {string} HTML representation.
1120 function diff_prettyHtml($diffs) {
1123 for ($x = 0; $x < count($diffs); $x++
) {
1124 $op = $diffs[$x][0]; // Operation (insert, delete, equal)
1125 $data = $diffs[$x][1]; // Text of change.
1126 $text = preg_replace(array (
1140 $html[$x] = '<INS STYLE="background:#E6FFE6;" TITLE="i=' . $i . '">' . $text . '</INS>';
1143 $html[$x] = '<DEL STYLE="background:#FFE6E6;" TITLE="i=' . $i . '">' . $text . '</DEL>';
1146 $html[$x] = '<SPAN TITLE="i=' . $i . '">' . $text . '</SPAN>';
1149 if ($op !== DIFF_DELETE
) {
1150 $i +
= mb_strlen($data);
1153 return implode('',$html);
1157 * Compute and return the source text (all equalities and deletions).
1158 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1159 * @return {string} Source text.
1161 function diff_text1($diffs) {
1163 for ($x = 0; $x < count($diffs); $x++
) {
1164 if ($diffs[$x][0] !== DIFF_INSERT
) {
1165 $text[$x] = $diffs[$x][1];
1168 return implode('',$text);
1172 * Compute and return the destination text (all equalities and insertions).
1173 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1174 * @return {string} Destination text.
1176 function diff_text2($diffs) {
1178 for ($x = 0; $x < count($diffs); $x++
) {
1179 if ($diffs[$x][0] !== DIFF_DELETE
) {
1180 $text[$x] = $diffs[$x][1];
1183 return implode('',$text);
1187 * Compute the Levenshtein distance; the number of inserted, deleted or
1188 * substituted characters.
1189 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1190 * @return {number} Number of changes.
1192 function diff_levenshtein($diffs) {
1196 for ($x = 0; $x < count($diffs); $x++
) {
1197 $op = $diffs[$x][0];
1198 $data = $diffs[$x][1];
1201 $insertions +
= mb_strlen($data);
1204 $deletions +
= mb_strlen($data);
1207 // A deletion and an insertion is one substitution.
1208 $levenshtein +
= max($insertions, $deletions);
1214 $levenshtein +
= max($insertions, $deletions);
1215 return $levenshtein;
1219 * Crush the diff into an encoded string which describes the operations
1220 * required to transform text1 into text2.
1221 * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1222 * Operations are tab-separated. Inserted text is escaped using %xx notation.
1223 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1224 * @return {string} Delta text.
1226 function diff_toDelta($diffs) {
1228 for ($x = 0; $x < count($diffs); $x++
) {
1229 switch ($diffs[$x][0]) {
1231 $text[$x] = '+' .encodeURI($diffs[$x][1]);
1234 $text[$x] = '-' .mb_strlen($diffs[$x][1]);
1237 $text[$x] = '=' .mb_strlen($diffs[$x][1]);
1241 return str_replace('%20', ' ', implode("\t", $text));
1245 * Given the original text1, and an encoded string which describes the
1246 * operations required to transform text1 into text2, compute the full diff.
1247 * @param {string} text1 Source string for the diff.
1248 * @param {string} delta Delta text.
1249 * @return {Array.<Array.<number|string>>} Array of diff tuples.
1250 * @throws {Error} If invalid input.
1252 function diff_fromDelta($text1, $delta) {
1254 $diffsLength = 0; // Keeping our own length var is faster in JS.
1255 $pointer = 0; // Cursor in text1
1256 $tokens = preg_split("/\t/", $delta);
1258 for ($x = 0; $x < count($tokens); $x++
) {
1259 // Each token begins with a one character parameter which specifies the
1260 // operation of this token (delete, insert, equality).
1261 $param = mb_substr($tokens[$x], 1);
1262 switch ($tokens[$x][0]) {
1265 $diffs[$diffsLength++
] = array (
1269 } catch (Exception
$ex) {
1270 echo_Exception('Illegal escape in diff_fromDelta: ' . $param);
1271 // Malformed URI sequence.
1279 echo_Exception('Invalid number in diff_fromDelta: ' . $param);
1281 $text = mb_substr($text1, $pointer, $n);
1283 if ($tokens[$x][0] == '=') {
1284 $diffs[$diffsLength++
] = array (
1289 $diffs[$diffsLength++
] = array (
1296 // Blank tokens are ok (from a trailing \t).
1297 // Anything else is an error.
1299 echo_Exception('Invalid diff operation in diff_fromDelta: ' . $tokens[$x]);
1303 if ($pointer != mb_strlen($text1)) {
1304 // throw new Exception('Delta length (' . $pointer . ') does not equal source text length (' . mb_strlen($text1) . ').');
1305 echo_Exception('Delta length (' . $pointer . ') does not equal source text length (' . mb_strlen($text1) . ').');
1313 * Locate the best instance of 'pattern' in 'text' near 'loc'.
1314 * @param {string} text The text to search.
1315 * @param {string} pattern The pattern to search for.
1316 * @param {number} loc The location to search around.
1317 * @return {number} Best match index or -1.
1319 function match_main($text, $pattern, $loc) {
1320 $loc = max(0, min($loc, mb_strlen($text)));
1321 if ($text == $pattern) {
1322 // Shortcut (potentially not guaranteed by the algorithm)
1325 elseif (!mb_strlen($text)) {
1326 // Nothing to match.
1329 elseif (mb_substr($text, $loc, mb_strlen($pattern)) == $pattern) {
1330 // Perfect match at the perfect spot! (Includes case of null pattern)
1333 // Do a fuzzy compare.
1334 return $this->match_bitap($text, $pattern, $loc);
1339 * Locate the best instance of 'pattern' in 'text' near 'loc' using the
1341 * @param {string} text The text to search.
1342 * @param {string} pattern The pattern to search for.
1343 * @param {number} loc The location to search around.
1344 * @return {number} Best match index or -1.
1347 function match_bitap($text, $pattern, $loc) {
1348 if (mb_strlen($pattern) > Match_MaxBits
) {
1349 echo_Exception('Pattern too long for this browser.');
1352 // Initialise the alphabet.
1353 $s = $this->match_alphabet($pattern);
1355 // Highest score beyond which we give up.
1356 $score_threshold = $this->Match_Threshold
;
1358 // Is there a nearby exact match? (speedup)
1359 $best_loc = mb_strpos($text, $pattern, $loc);
1360 if ($best_loc !== false) {
1361 $score_threshold = min($this->match_bitapScore(0, $best_loc, $pattern, $loc), $score_threshold);
1364 // What about in the other direction? (speedup)
1365 $best_loc = mb_strrpos( $text, $pattern, min($loc +
mb_strlen($pattern), mb_strlen($text)) );
1366 if ($best_loc !== false) {
1367 $score_threshold = min($this->match_bitapScore(0, $best_loc, $pattern, $loc), $score_threshold);
1370 // Initialise the bit arrays.
1371 $matchmask = 1 << (mb_strlen($pattern) - 1);
1376 $bin_max = mb_strlen($pattern) +
mb_strlen($text);
1378 for ($d = 0; $d < mb_strlen($pattern); $d++
) {
1379 // Scan for the best match; each iteration allows for one more error.
1380 // Run a binary search to determine how far from 'loc' we can stray at this
1383 $bin_mid = $bin_max;
1384 while ($bin_min < $bin_mid) {
1385 if ($this->match_bitapScore($d, $loc +
$bin_mid, $pattern, $loc) <= $score_threshold) {
1386 $bin_min = $bin_mid;
1388 $bin_max = $bin_mid;
1390 $bin_mid = floor(($bin_max - $bin_min) / 2 +
$bin_min);
1392 // Use the result from this iteration as the maximum for the next.
1393 $bin_max = $bin_mid;
1394 $start = max(1, $loc - $bin_mid +
1);
1395 $finish = min($loc +
$bin_mid, mb_strlen($text)) +
mb_strlen($pattern);
1400 $rd[$finish +
1] = (1 << $d) - 1;
1401 for ($j = $finish; $j >= $start; $j--) {
1402 // The alphabet (s) is a sparse hash, so the following line generates
1404 @ $charMatch = $s[ $text[$j -1] ];
1405 if ($d === 0) { // First pass: exact match.
1406 $rd[$j] = (($rd[$j +
1] << 1) |
1) & $charMatch;
1407 } else { // Subsequent passes: fuzzy match.
1408 $rd[$j] = (($rd[$j +
1] << 1) |
1) & $charMatch |
((($last_rd[$j +
1] |
$last_rd[$j]) << 1) |
1) |
$last_rd[$j +
1];
1410 if ($rd[$j] & $matchmask) {
1411 $score = $this->match_bitapScore($d, $j -1, $pattern, $loc);
1412 // This match will almost certainly be better than any existing match.
1413 // But check anyway.
1414 if ($score <= $score_threshold) {
1416 $score_threshold = $score;
1418 if ($best_loc > $loc) {
1419 // When passing loc, don't exceed our current distance from loc.
1420 $start = max(1, 2 * $loc - $best_loc);
1422 // Already passed loc, downhill from here on in.
1428 // No hope for a (better) match at greater error levels.
1429 if ($this->match_bitapScore($d +
1, $loc, $pattern, $loc) > $score_threshold) {
1434 return (int)$best_loc;
1438 * Compute and return the score for a match with e errors and x location.
1439 * Accesses loc and pattern through being a closure.
1440 * @param {number} e Number of errors in match.
1441 * @param {number} x Location of match.
1442 * @return {number} Overall score for match (0.0 = good, 1.0 = bad).
1445 function match_bitapScore($e, $x, $pattern, $loc) {
1446 $accuracy = $e / mb_strlen($pattern);
1447 $proximity = abs($loc - $x);
1448 if (!$this->Match_Distance
) {
1449 // Dodge divide by zero error.
1450 return $proximity ?
1.0 : $accuracy;
1452 return $accuracy +
($proximity / $this->Match_Distance
);
1456 * Initialise the alphabet for the Bitap algorithm.
1457 * @param {string} pattern The text to encode.
1458 * @return {Object} Hash of character locations.
1461 function match_alphabet($pattern) {
1463 for ($i = 0; $i < mb_strlen($pattern); $i++
) {
1464 $s[ $pattern[$i] ] = 0;
1466 for ($i = 0; $i < mb_strlen($pattern); $i++
) {
1467 $s[ $pattern[$i] ] |
= 1 << (mb_strlen($pattern) - $i - 1);
1475 * Increase the context until it is unique,
1476 * but don't let the pattern expand beyond Match_MaxBits.
1477 * @param {patch_obj} patch The patch to grow.
1478 * @param {string} text Source text.
1481 function patch_addContext($patch, $text) {
1482 $pattern = mb_substr($text, $patch->start2
, $patch->length1
);
1483 $previousPattern = null;
1487 ( mb_strlen($pattern) === 0 // Javascript's indexOf/lastIndexOd return 0/strlen respectively if pattern = ''
1488 ||
mb_strpos($text, $pattern) !== mb_strrpos($text, $pattern)
1490 && $pattern !== $previousPattern // avoid infinte loop
1491 && mb_strlen($pattern) < Match_MaxBits
- $this->Patch_Margin
- $this->Patch_Margin
) {
1492 $padding +
= $this->Patch_Margin
;
1493 $previousPattern = $pattern;
1494 $pattern = mb_substr($text, max($patch->start2
- $padding,0), ($patch->start2 +
$patch->length1 +
$padding) - max($patch->start2
- $padding,0) );
1496 // Add one chunk for good luck.
1497 $padding +
= $this->Patch_Margin
;
1499 $prefix = mb_substr($text, max($patch->start2
- $padding,0), $patch->start2
- max($patch->start2
- $padding,0) );
1501 array_unshift($patch->diffs
, array (
1507 $suffix = mb_substr($text, $patch->start2 +
$patch->length1
, ($patch->start2 +
$patch->length1 +
$padding) - ($patch->start2 +
$patch->length1
) );
1509 array_push($patch->diffs
, array (
1515 // Roll back the start points.
1516 $patch->start1
-= mb_strlen($prefix);
1517 $patch->start2
-= mb_strlen($prefix);
1518 // Extend the lengths.
1519 $patch->length1 +
= mb_strlen($prefix) +
mb_strlen($suffix);
1520 $patch->length2 +
= mb_strlen($prefix) +
mb_strlen($suffix);
1524 * Compute a list of patches to turn text1 into text2.
1525 * Use diffs if provided, otherwise compute it ourselves.
1526 * There are four ways to call this function, depending on what data is
1527 * available to the caller:
1529 * a = text1, b = text2
1532 * Method 3 (optimal):
1533 * a = text1, b = diffs
1534 * Method 4 (deprecated, use method 3):
1535 * a = text1, b = text2, c = diffs
1537 * @param {string|Array.<Array.<number|string>>} a text1 (methods 1,3,4) or
1538 * Array of diff tuples for text1 to text2 (method 2).
1539 * @param {string|Array.<Array.<number|string>>} opt_b text2 (methods 1,4) or
1540 * Array of diff tuples for text1 to text2 (method 3) or undefined (method 2).
1541 * @param {string|Array.<Array.<number|string>>} opt_c Array of diff tuples for
1542 * text1 to text2 (method 4) or undefined (methods 1,2,3).
1543 * @return {Array.<patch_obj>} Array of patch objects.
1545 function patch_make($a, $opt_b = null, $opt_c = null ) {
1546 if (is_string($a) && is_string($opt_b) && $opt_c === null ) {
1547 // Method 1: text1, text2
1548 // Compute diffs from text1 and text2.
1550 $diffs = $this->diff_main($text1, $opt_b, true);
1551 if ( count($diffs) > 2) {
1552 $this->diff_cleanupSemantic($diffs);
1553 $this->diff_cleanupEfficiency($diffs);
1555 } elseif ( is_array($a) && $opt_b === null && $opt_c === null) {
1557 // Compute text1 from diffs.
1559 $text1 = $this->diff_text1($diffs);
1560 } elseif ( is_string($a) && is_array($opt_b) && $opt_c === null) {
1561 // Method 3: text1, diffs
1564 } elseif ( is_string($a) && is_string($opt_b) && is_array($opt_c) ) {
1565 // Method 4: text1, text2, diffs
1566 // text2 is not used.
1570 echo_Exception('Unknown call format to patch_make.');
1573 if ( count($diffs) === 0) {
1574 return array(); // Get rid of the null case.
1577 $patch = new patch_obj();
1578 $patchDiffLength = 0; // Keeping our own length var is faster in JS.
1579 $char_count1 = 0; // Number of characters into the text1 string.
1580 $char_count2 = 0; // Number of characters into the text2 string.
1581 // Start with text1 (prepatch_text) and apply the diffs until we arrive at
1582 // text2 (postpatch_text). We recreate the patches one by one to determine
1584 $prepatch_text = $text1;
1585 $postpatch_text = $text1;
1586 for ($x = 0; $x < count($diffs); $x++
) {
1587 $diff_type = $diffs[$x][0];
1588 $diff_text = $diffs[$x][1];
1590 if (!$patchDiffLength && $diff_type !== DIFF_EQUAL
) {
1591 // A new patch starts here.
1592 $patch->start1
= $char_count1;
1593 $patch->start2
= $char_count2;
1596 switch ($diff_type) {
1598 $patch->diffs
[$patchDiffLength++
] = $diffs[$x];
1599 $patch->length2 +
= mb_strlen($diff_text);
1600 $postpatch_text = mb_substr($postpatch_text, 0, $char_count2) . $diff_text . mb_substr($postpatch_text,$char_count2);
1603 $patch->length1 +
= mb_strlen($diff_text);
1604 $patch->diffs
[$patchDiffLength++
] = $diffs[$x];
1605 $postpatch_text = mb_substr($postpatch_text, 0, $char_count2) . mb_substr($postpatch_text, $char_count2 +
mb_strlen($diff_text) );
1608 if ( mb_strlen($diff_text) <= 2 * $this->Patch_Margin
&& $patchDiffLength && count($diffs) != $x +
1) {
1609 // Small equality inside a patch.
1610 $patch->diffs
[$patchDiffLength++
] = $diffs[$x];
1611 $patch->length1 +
= mb_strlen($diff_text);
1612 $patch->length2 +
= mb_strlen($diff_text);
1613 } elseif ( mb_strlen($diff_text) >= 2 * $this->Patch_Margin
) {
1614 // Time for a new patch.
1615 if ($patchDiffLength) {
1616 $this->patch_addContext($patch, $prepatch_text);
1617 array_push($patches,$patch);
1618 $patch = new patch_obj();
1619 $patchDiffLength = 0;
1620 // Unlike Unidiff, our patch lists have a rolling context.
1621 // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1622 // Update prepatch text & pos to reflect the application of the
1623 // just completed patch.
1624 $prepatch_text = $postpatch_text;
1625 $char_count1 = $char_count2;
1631 // Update the current character count.
1632 if ($diff_type !== DIFF_INSERT
) {
1633 $char_count1 +
= mb_strlen($diff_text);
1635 if ($diff_type !== DIFF_DELETE
) {
1636 $char_count2 +
= mb_strlen($diff_text);
1639 // Pick up the leftover patch if not empty.
1640 if ($patchDiffLength) {
1641 $this->patch_addContext($patch, $prepatch_text);
1642 array_push($patches, $patch);
1649 * Given an array of patches, return another array that is identical.
1650 * @param {Array.<patch_obj>} patches Array of patch objects.
1651 * @return {Array.<patch_obj>} Array of patch objects.
1653 function patch_deepCopy($patches) {
1654 // Making deep copies is hard in JavaScript.
1655 $patchesCopy = array();
1656 for ($x = 0; $x < count($patches); $x++
) {
1657 $patch = $patches[$x];
1658 $patchCopy = new patch_obj();
1659 for ($y = 0; $y < count($patch->diffs
); $y++
) {
1660 $patchCopy->diffs
[$y] = $patch->diffs
[$y]; // ?? . slice();
1662 $patchCopy->start1
= $patch->start1
;
1663 $patchCopy->start2
= $patch->start2
;
1664 $patchCopy->length1
= $patch->length1
;
1665 $patchCopy->length2
= $patch->length2
;
1666 $patchesCopy[$x] = $patchCopy;
1668 return $patchesCopy;
1672 * Merge a set of patches onto the text. Return a patched text, as well
1673 * as a list of true/false values indicating which patches were applied.
1674 * @param {Array.<patch_obj>} patches Array of patch objects.
1675 * @param {string} text Old text.
1676 * @return {Array.<string|Array.<boolean>>} Two element Array, containing the
1677 * new text and an array of boolean values.
1679 function patch_apply($patches, $text) {
1680 if ( count($patches) == 0) {
1681 return array($text,array());
1684 // Deep copy the patches so that no changes are made to originals.
1685 $patches = $this->patch_deepCopy($patches);
1687 $nullPadding = $this->patch_addPadding($patches);
1688 $text = $nullPadding . $text . $nullPadding;
1690 $this->patch_splitMax($patches);
1691 // delta keeps track of the offset between the expected and actual location
1692 // of the previous patch. If there are patches expected at positions 10 and
1693 // 20, but the first patch was found at 12, delta is 2 and the second patch
1694 // has an effective expected position of 22.
1697 for ($x = 0; $x < count($patches) ; $x++
) {
1698 $expected_loc = $patches[$x]->start2 +
$delta;
1699 $text1 = $this->diff_text1($patches[$x]->diffs
);
1702 if (mb_strlen($text1) > Match_MaxBits
) {
1703 // patch_splitMax will only provide an oversized pattern in the case of
1704 // a monster delete.
1705 $start_loc = $this->match_main($text, mb_substr($text1, 0, Match_MaxBits
), $expected_loc);
1706 if ($start_loc != -1) {
1707 $end_loc = $this->match_main($text, mb_substr($text1,mb_strlen($text1) - Match_MaxBits
), $expected_loc +
mb_strlen($text1) - Match_MaxBits
);
1708 if ($end_loc == -1 ||
$start_loc >= $end_loc) {
1709 // Can't find valid trailing context. Drop this patch.
1714 $start_loc = $this->match_main($text, $text1, $expected_loc);
1716 if ($start_loc == -1) {
1717 // No match found. :(
1718 $results[$x] = false;
1719 // Subtract the delta for this failed patch from subsequent patches.
1720 $delta -= $patches[$x]->length2
- $patches[$x]->length1
;
1722 // Found a match. :)
1723 $results[$x] = true;
1724 $delta = $start_loc - $expected_loc;
1726 if ($end_loc == -1) {
1727 $text2 = mb_substr($text, $start_loc, mb_strlen($text1) );
1729 $text2 = mb_substr($text, $start_loc, $end_loc + Match_MaxBits
- $start_loc);
1731 if ($text1 == $text2) {
1732 // Perfect match, just shove the replacement text in.
1733 $text = mb_substr($text, 0, $start_loc) . $this->diff_text2($patches[$x]->diffs
) . mb_substr($text,$start_loc +
mb_strlen($text1) );
1735 // Imperfect match. Run a diff to get a framework of equivalent
1737 $diffs = $this->diff_main($text1, $text2, false);
1738 if ( mb_strlen($text1) > Match_MaxBits
&& $this->diff_levenshtein($diffs) / mb_strlen($text1) > $this->Patch_DeleteThreshold
) {
1739 // The end points match, but the content is unacceptably bad.
1740 $results[$x] = false;
1742 $this->diff_cleanupSemanticLossless($diffs);
1745 for ($y = 0; $y < count($patches[$x]->diffs
); $y++
) {
1746 $mod = $patches[$x]->diffs
[$y];
1747 if ($mod[0] !== DIFF_EQUAL
) {
1748 $index2 = $this->diff_xIndex($diffs, $index1);
1750 if ($mod[0] === DIFF_INSERT
) { // Insertion
1751 $text = mb_substr($text, 0, $start_loc +
$index2) . $mod[1] . mb_substr($text, $start_loc +
$index2);
1752 } elseif ($mod[0] === DIFF_DELETE
) { // Deletion
1753 $text = mb_substr($text, 0, $start_loc +
$index2) . mb_substr($text,$start_loc +
$this->diff_xIndex($diffs, $index1 +
mb_strlen($mod[1]) ));
1755 if ($mod[0] !== DIFF_DELETE
) {
1756 $index1 +
= mb_strlen($mod[1]);
1763 // Strip the padding off.
1764 $text = mb_substr($text, mb_strlen($nullPadding), mb_strlen($text) - 2*mb_strlen($nullPadding) );
1765 return array($text, $results);
1769 * Add some padding on text start and end so that edges can match something.
1770 * Intended to be called only from within patch_apply.
1771 * @param {Array.<patch_obj>} patches Array of patch objects.
1772 * @return {string} The padding string added to each side.
1774 function patch_addPadding(&$patches){
1775 $paddingLength = $this->Patch_Margin
;
1777 for ($x = 1; $x <= $paddingLength; $x++
) {
1778 $nullPadding .= mb_chr($x);
1781 // Bump all the patches forward.
1782 for ($x = 0; $x < count($patches); $x++
) {
1783 $patches[$x]->start1 +
= $paddingLength;
1784 $patches[$x]->start2 +
= $paddingLength;
1787 // Add some padding on start of first diff.
1788 $patch = &$patches[0];
1789 $diffs = &$patch->diffs
;
1790 if (count($diffs) == 0 ||
$diffs[0][0] != DIFF_EQUAL
) {
1791 // Add nullPadding equality.
1792 array_unshift($diffs, array(DIFF_EQUAL
, $nullPadding));
1793 $patch->start1
-= $paddingLength; // Should be 0.
1794 $patch->start2
-= $paddingLength; // Should be 0.
1795 $patch->length1 +
= $paddingLength;
1796 $patch->length2 +
= $paddingLength;
1797 } elseif ($paddingLength > mb_strlen($diffs[0][1]) ) {
1798 // Grow first equality.
1799 $extraLength = $paddingLength - mb_strlen($diffs[0][1]);
1800 $diffs[0][1] = mb_substr( $nullPadding , mb_strlen($diffs[0][1]) ) . $diffs[0][1];
1801 $patch->start1
-= $extraLength;
1802 $patch->start2
-= $extraLength;
1803 $patch->length1 +
= $extraLength;
1804 $patch->length2 +
= $extraLength;
1807 // Add some padding on end of last diff.
1808 $patch = &$patches[count($patches) - 1];
1809 $diffs = &$patch->diffs
;
1810 if ( count($diffs) == 0 ||
$diffs[ count($diffs) - 1][0] != DIFF_EQUAL
) {
1811 // Add nullPadding equality.
1812 array_push($diffs, array(DIFF_EQUAL
, $nullPadding) );
1813 $patch->length1 +
= $paddingLength;
1814 $patch->length2 +
= $paddingLength;
1815 } elseif ($paddingLength > mb_strlen( $diffs[count($diffs)-1][1] ) ) {
1816 // Grow last equality.
1817 $extraLength = $paddingLength - mb_strlen( $diffs[count($diffs)-1][1] );
1818 $diffs[ count($diffs)-1][1] .= mb_substr($nullPadding,0,$extraLength);
1819 $patch->length1 +
= $extraLength;
1820 $patch->length2 +
= $extraLength;
1823 return $nullPadding;
1827 * Look through the patches and break up any which are longer than the maximum
1828 * limit of the match algorithm.
1829 * @param {Array.<patch_obj>} patches Array of patch objects.
1831 function patch_splitMax(&$patches) {
1832 for ($x = 0; $x < count($patches); $x++
) {
1833 if ( $patches[$x]->length1
> Match_MaxBits
) {
1834 $bigpatch = $patches[$x];
1835 // Remove the big old patch.
1836 array_splice($patches,$x--,1);
1837 $patch_size = Match_MaxBits
;
1838 $start1 = $bigpatch->start1
;
1839 $start2 = $bigpatch->start2
;
1841 while ( count($bigpatch->diffs
) !== 0) {
1842 // Create one of several smaller patches.
1843 $patch = new patch_obj();
1845 $patch->start1
= $start1 - mb_strlen($precontext);
1846 $patch->start2
= $start2 - mb_strlen($precontext);
1847 if ($precontext !== '') {
1848 $patch->length1
= $patch->length2
= mb_strlen($precontext);
1849 array_push($patch->diffs
, array(DIFF_EQUAL
, $precontext) );
1851 while ( count($bigpatch->diffs
) !== 0 && $patch->length1
< $patch_size - $this->Patch_Margin
) {
1852 $diff_type = $bigpatch->diffs
[0][0];
1853 $diff_text = $bigpatch->diffs
[0][1];
1854 if ($diff_type === DIFF_INSERT
) {
1855 // Insertions are harmless.
1856 $patch->length2 +
= mb_strlen($diff_text);
1857 $start2 +
= mb_strlen($diff_text);
1858 array_push($patch->diffs
, array_shift($bigpatch->diffs
) );
1861 if ($diff_type === DIFF_DELETE
&& count($patch->diffs
) == 1 && $patch->diffs
[0][0] == DIFF_EQUAL
&& (mb_strlen($diff_text) > 2 * $patch_size) ) {
1862 // This is a large deletion. Let it pass in one chunk.
1863 $patch->length1 +
= mb_strlen($diff_text);
1864 $start1 +
= mb_strlen($diff_text);
1866 array_push( $patch->diffs
, array($diff_type, $diff_text) );
1867 array_shift($bigpatch->diffs
);
1869 // Deletion or equality. Only take as much as we can stomach.
1870 $diff_text = mb_substr($diff_text, 0, $patch_size - $patch->length1
- $this->Patch_Margin
);
1871 $patch->length1 +
= mb_strlen($diff_text);
1872 $start1 +
= mb_strlen($diff_text);
1873 if ($diff_type === DIFF_EQUAL
) {
1874 $patch->length2 +
= mb_strlen($diff_text);
1875 $start2 +
= mb_strlen($diff_text);
1879 array_push($patch->diffs
, array($diff_type, $diff_text) );
1880 if ($diff_text == $bigpatch->diffs
[0][1]) {
1881 array_shift($bigpatch->diffs
);
1883 $bigpatch->diffs
[0][1] = mb_substr( $bigpatch->diffs
[0][1],mb_strlen($diff_text) );
1887 // Compute the head context for the next patch.
1888 $precontext = $this->diff_text2($patch->diffs
);
1889 $precontext = mb_substr($precontext, mb_strlen($precontext)-$this->Patch_Margin
);
1890 // Append the end context for this patch.
1891 $postcontext = mb_substr( $this->diff_text1($bigpatch->diffs
), 0, $this->Patch_Margin
);
1892 if ($postcontext !== '') {
1893 $patch->length1 +
= mb_strlen($postcontext);
1894 $patch->length2 +
= mb_strlen($postcontext);
1895 if ( count($patch->diffs
) !== 0 && $patch->diffs
[ count($patch->diffs
) - 1][0] === DIFF_EQUAL
) {
1896 $patch->diffs
[ count($patch->diffs
) - 1][1] .= $postcontext;
1898 array_push($patch->diffs
, array(DIFF_EQUAL
, $postcontext));
1902 array_splice($patches, ++
$x, 0, array($patch));
1910 * Take a list of patches and return a textual representation.
1911 * @param {Array.<patch_obj>} patches Array of patch objects.
1912 * @return {string} Text representation of patches.
1914 function patch_toText($patches) {
1916 for ($x = 0; $x < count($patches) ; $x++
) {
1917 $text[$x] = $patches[$x];
1919 return implode('',$text);
1923 * Parse a textual representation of patches and return a list of patch objects.
1924 * @param {string} textline Text representation of patches.
1925 * @return {Array.<patch_obj>} Array of patch objects.
1926 * @throws {Error} If invalid input.
1928 function patch_fromText($textline) {
1930 if ($textline === '') {
1933 $text = explode("\n",$textline);
1934 foreach($text as $i=>$t){ if($t===''){ unset($text[$i]); } }
1936 while ($textPointer < count($text) ) {
1938 preg_match('/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/',$text[$textPointer],$m);
1940 echo_Exception('Invalid patch string: ' . $text[$textPointer]);
1942 $patch = new patch_obj();
1943 array_push($patches, $patch);
1944 @$patch->start1
= (int)$m[1];
1945 if (@$m[2] === '') {
1947 $patch->length1
= 1;
1948 } elseif ( @$m[2] == '0') {
1949 $patch->length1
= 0;
1952 @$patch->length1
= (int)$m[2];
1955 @$patch->start2
= (int)$m[3];
1956 if (@$m[4] === '') {
1958 $patch->length2
= 1;
1959 } elseif ( @$m[4] == '0') {
1960 $patch->length2
= 0;
1963 @$patch->length2
= (int)$m[4];
1967 while ($textPointer < count($text) ) {
1968 $sign = $text[$textPointer][0];
1970 $line = decodeURI( mb_substr($text[$textPointer],1) );
1971 } catch (Exception
$ex) {
1972 // Malformed URI sequence.
1973 throw new Exception('Illegal escape in patch_fromText: ' . $line);
1977 array_push( $patch->diffs
, array(DIFF_DELETE
, $line) );
1978 } elseif ($sign == '+') {
1980 array_push($patch->diffs
, array(DIFF_INSERT
, $line) );
1981 } elseif ($sign == ' ') {
1983 array_push($patch->diffs
, array(DIFF_EQUAL
, $line) );
1984 } elseif ($sign == '@') {
1985 // Start of next patch.
1987 } elseif ($sign === '') {
1988 // Blank line? Whatever.
1991 echo_Exception('Invalid patch mode "' . $sign . '" in: ' . $line);
2001 * Class representing one patch operation.
2005 /** @type {Array.<Array.<number|string>>} */
2006 public $diffs = array();
2007 /** @type {number?} */
2008 public $start1 = null;
2009 /** @type {number?} */
2010 public $start2 = null;
2011 /** @type {number} */
2012 public $length1 = 0;
2013 /** @type {number} */
2014 public $length2 = 0;
2017 * Emmulate GNU diff's format.
2018 * Header: @@ -382,8 +481,9 @@
2019 * Indicies are printed as 1-based, not 0-based.
2020 * @return {string} The GNU diff string.
2022 function toString() {
2023 if ($this->length1
=== 0) {
2024 $coords1 = $this->start1
. ',0';
2025 } elseif ($this->length1
== 1) {
2026 $coords1 = $this->start1 +
1;
2028 $coords1 = ($this->start1 +
1) . ',' . $this->length1
;
2030 if ($this->length2
=== 0) {
2031 $coords2 = $this->start2
. ',0';
2032 } elseif ($this->length2
== 1) {
2033 $coords2 = $this->start2 +
1;
2035 $coords2 = ($this->start2 +
1) . ',' . $this->length2
;
2037 $text = array ( '@@ -' . $coords1 . ' +' . $coords2 . " @@\n" );
2039 // Escape the body of the patch with %xx notation.
2040 for ($x = 0; $x < count($this->diffs
); $x++
) {
2041 switch ($this->diffs
[$x][0]) {
2052 $text[$x +
1] = $op . encodeURI($this->diffs
[$x][1]) . "\n";
2054 return str_replace('%20', ' ', implode('',$text));
2056 function __toString(){
2057 return $this->toString();
2061 define('DIFF_DELETE', -1);
2062 define('DIFF_INSERT', 1);
2063 define('DIFF_EQUAL', 0);
2065 define('Match_MaxBits', PHP_INT_SIZE
* 8);
2068 function charCodeAt($str, $pos) {
2069 return mb_ord(mb_substr($str, $pos, 1));
2071 function mb_ord($v) {
2072 $k = mb_convert_encoding($v, 'UCS-2LE', 'UTF-8');
2073 $k1 = ord(substr($k, 0, 1));
2074 $k2 = ord(substr($k, 1, 1));
2075 return $k2 * 256 +
$k1;
2077 function mb_chr($num){
2078 return mb_convert_encoding('&#'.intval($num).';', 'UTF-8', 'HTML-ENTITIES');
2082 * as in javascript encodeURI() following the MDN description
2084 * @link https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURI
2088 function encodeURI($url) {
2089 return strtr(rawurlencode($url), array (
2090 '%3B' => ';', '%2C' => ',', '%2F' => '/', '%3F' => '?', '%3A' => ':', '%40' => '@', '%26' => '&', '%3D' => '=',
2091 '%2B' => '+', '%24' => '$', '%21' => '!', '%2A' => '*', '%27' => '\'', '%28' => '(', '%29' => ')', '%23' => '#',
2095 function decodeURI($encoded) {
2099 '%3B' => ';', '%2C' => ',', '%2F' => '/', '%3F' => '?', '%3A' => ':', '%40' => '@', '%26' => '&', '%3D' => '=',
2100 '%2B' => '+', '%24' => '$', '%21' => '!', '%2A' => '*', '%27' => '\'', '%28' => '(', '%29' => ')', '%23' => '#',
2102 $dontDecode = array();
2103 foreach ($table as $k => $v) {
2104 $dontDecode[$k] = encodeURI($k);
2107 return rawurldecode(strtr($encoded, $dontDecode));
2110 function echo_Exception($str){
2111 global $lastException;
2112 $lastException = $str;
2115 //mb_internal_encoding("UTF-8");