Remove product literal strings in "pht()", part 5
[phabricator.git] / externals / diff_match_patch / diff_match_patch.php
blob35bdb82068d2af55d2079ffad78dd66966b748bf
1 <?php
2 /**
3 * Diff Match and Patch
5 * Copyright 2006 Google Inc.
6 * http://code.google.com/p/google-diff-match-patch/
8 * php port by Tobias Buschor shwups.ch
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
23 /**
24 * @fileoverview Computes the difference between two texts to create a patch.
25 * Applies the patch onto another text, allowing for errors.
26 * @author fraser@google.com (Neil Fraser)
29 /**
30 * Class containing the diff, match and patch methods.
31 * @constructor
33 class diff_match_patch {
35 // Defaults.
36 // Redefine these in your program to override the defaults.
38 // Number of seconds to map a diff before giving up (0 for infinity).
39 public $Diff_Timeout = 1.0;
40 // Cost of an empty edit operation in terms of edit characters.
41 public $Diff_EditCost = 4;
42 // The size beyond which the double-ended diff activates.
43 // Double-ending is twice as fast, but less accurate.
44 public $Diff_DualThreshold = 32;
45 // At what point is no match declared (0.0 = perfection, 1.0 = very loose).
46 public $Match_Threshold = 0.5;
47 // How far to search for a match (0 = exact location, 1000+ = broad match).
48 // A match this many characters away from the expected location will add
49 // 1.0 to the score (0.0 is a perfect match).
50 public $Match_Distance = 1000;
51 // When deleting a large block of text (over ~64 characters), how close does
52 // the contents have to match the expected contents. (0.0 = perfection,
53 // 1.0 = very loose). Note that Match_Threshold controls how closely the
54 // end points of a delete need to match.
55 public $Patch_DeleteThreshold = 0.5;
56 // Chunk size for context length.
57 public $Patch_Margin = 4;
59 /**
60 * Compute the number of bits in an int.
61 * The normal answer for JavaScript is 32.
62 * @return {number} Max bits
64 function getMaxBits() {
65 var maxbits = 0;
66 var oldi = 1;
67 var newi = 2;
68 while (oldi != newi) {
69 maxbits++;
70 oldi = newi;
71 newi = newi << 1;
73 return maxbits;
75 // How many bits in a number?
76 this.Match_MaxBits = getMaxBits();
78 // DIFF FUNCTIONS
80 /**
81 * Find the differences between two texts. Simplifies the problem by stripping
82 * any common prefix or suffix off the texts before diffing.
83 * @param {string} text1 Old string to be diffed.
84 * @param {string} text2 New string to be diffed.
85 * @param {boolean} opt_checklines Optional speedup flag. If present and false,
86 * then don't run a line-level diff first to identify the changed areas.
87 * Defaults to true, which does a faster, slightly less optimal diff
88 * @return {Array.<Array.<number|string>>} Array of diff tuples.
90 function diff_main($text1, $text2, $checklines = true) {
91 // Check for equality (speedup)
92 if ($text1 === $text2) {
93 return array ( array ( DIFF_EQUAL, $text1) );
96 // Trim off common prefix (speedup)
97 $commonlength = $this->diff_commonPrefix($text1, $text2);
98 $commonprefix = mb_substr($text1, 0, $commonlength);
99 $text1 = mb_substr($text1, $commonlength);
100 $text2 = mb_substr($text2, $commonlength);
102 // Trim off common suffix (speedup)
103 $commonlength = $this->diff_commonSuffix($text1, $text2);
104 $commonsuffix = mb_substr($text1, mb_strlen($text1) - $commonlength);
105 $text1 = mb_substr($text1, 0, mb_strlen($text1) - $commonlength);
106 $text2 = mb_substr($text2, 0, mb_strlen($text2) - $commonlength);
108 // Compute the diff on the middle block
109 $diffs = $this->diff_compute($text1, $text2, $checklines);
111 // Restore the prefix and suffix
112 if ($commonprefix !== '') {
113 array_unshift($diffs, array ( DIFF_EQUAL, $commonprefix ));
115 if ($commonsuffix !== '') {
116 array_push($diffs, array ( DIFF_EQUAL, $commonsuffix ));
118 $this->diff_cleanupMerge($diffs);
119 return $diffs;
123 * Find the differences between two texts. Assumes that the texts do not
124 * have any common prefix or suffix.
125 * @param {string} text1 Old string to be diffed.
126 * @param {string} text2 New string to be diffed.
127 * @param {boolean} checklines Speedup flag. If false, then don't run a
128 * line-level diff first to identify the changed areas.
129 * If true, then run a faster, slightly less optimal diff
130 * @return {Array.<Array.<number|string>>} Array of diff tuples.
131 * @private
133 function diff_compute($text1, $text2, $checklines) {
135 if ($text1 === '') {
136 // Just add some text (speedup)
137 return array ( array ( DIFF_INSERT, $text2 ) );
140 if ($text2 === '') {
141 // Just delete some text (speedup)
142 return array ( array ( DIFF_DELETE, $text1 ) );
145 $longtext = mb_strlen($text1) > mb_strlen($text2) ? $text1 : $text2;
146 $shorttext = mb_strlen($text1) > mb_strlen($text2) ? $text2 : $text1;
147 $i = mb_strpos($longtext, $shorttext);
148 if ($i !== false) {
149 // Shorter text is inside the longer text (speedup)
150 $diffs = array (
151 array ( DIFF_INSERT, mb_substr($longtext, 0, $i) ),
152 array ( DIFF_EQUAL, $shorttext ),
153 array ( DIFF_INSERT, mb_substr($longtext, $i +mb_strlen($shorttext)) )
156 // Swap insertions for deletions if diff is reversed.
157 if (mb_strlen($text1) > mb_strlen($text2)) {
158 $diffs[0][0] = $diffs[2][0] = DIFF_DELETE;
160 return $diffs;
162 $longtext = $shorttext = null; // Garbage collect
164 // Check to see if the problem can be split in two.
165 $hm = $this->diff_halfMatch($text1, $text2);
166 if ($hm) {
167 // A half-match was found, sort out the return data.
168 $text1_a = $hm[0];
169 $text1_b = $hm[1];
170 $text2_a = $hm[2];
171 $text2_b = $hm[3];
172 $mid_common = $hm[4];
173 // Send both pairs off for separate processing.
174 $diffs_a = $this->diff_main($text1_a, $text2_a, $checklines);
175 $diffs_b = $this->diff_main($text1_b, $text2_b, $checklines);
176 // Merge the results.
177 return array_merge($diffs_a, array (
178 array (
179 DIFF_EQUAL,
180 $mid_common
182 ), $diffs_b);
185 // Perform a real diff.
186 if ($checklines && (mb_strlen($text1) < 100 || mb_strlen($text2) < 100)) {
187 // Too trivial for the overhead.
188 $checklines = false;
190 $linearray = null;
191 if ($checklines) {
192 // Scan the text on a line-by-line basis first.
193 $a = $this->diff_linesToChars($text1, $text2);
194 $text1 = $a[0];
195 $text2 = $a[1];
196 $linearray = $a[2];
198 $diffs = $this->diff_map($text1, $text2);
199 if (!$diffs) {
200 // No acceptable result.
201 $diffs = array (
202 array (
203 DIFF_DELETE,
204 $text1
206 array (
207 DIFF_INSERT,
208 $text2
212 if ($checklines) {
213 // Convert the diff back to original text.
214 $this->diff_charsToLines($diffs, $linearray);
215 // Eliminate freak matches (e.g. blank lines)
216 $this->diff_cleanupSemantic($diffs);
218 // Rediff any replacement blocks, this time character-by-character.
219 // Add a dummy entry at the end.
220 array_push($diffs, array (
221 DIFF_EQUAL,
224 $pointer = 0;
225 $count_delete = 0;
226 $count_insert = 0;
227 $text_delete = '';
228 $text_insert = '';
229 while ($pointer < count($diffs)) {
230 switch ($diffs[$pointer][0]) {
231 case DIFF_INSERT :
232 $count_insert++;
233 $text_insert .= $diffs[$pointer][1];
234 break;
235 case DIFF_DELETE :
236 $count_delete++;
237 $text_delete .= $diffs[$pointer][1];
238 break;
239 case DIFF_EQUAL :
240 // Upon reaching an equality, check for prior redundancies.
241 if ($count_delete >= 1 && $count_insert >= 1) {
242 // Delete the offending records and add the merged ones.
243 $a = $this->diff_main($text_delete, $text_insert, false);
244 array_splice($diffs, $pointer - $count_delete - $count_insert, $count_delete + $count_insert);
246 $pointer = $pointer - $count_delete - $count_insert;
247 for ($j = count($a) - 1; $j >= 0; $j--) {
248 array_splice($diffs, $pointer, 0, array($a[$j]));
250 $pointer = $pointer +count($a);
252 $count_insert = 0;
253 $count_delete = 0;
254 $text_delete = '';
255 $text_insert = '';
256 break;
258 $pointer++;
260 array_pop($diffs); // Remove the dummy entry at the end.
262 return $diffs;
266 * Split two texts into an array of strings. Reduce the texts to a string of
267 * hashes where each Unicode character represents one line.
268 * @param {string} text1 First string.
269 * @param {string} text2 Second string.
270 * @return {Array.<string|Array.<string>>} Three element Array, containing the
271 * encoded text1, the encoded text2 and the array of unique strings. The
272 * zeroth element of the array of unique strings is intentionally blank.
273 * @private
275 function diff_linesToChars($text1, $text2) {
276 $lineArray = array(); // e.g. lineArray[4] == 'Hello\n'
277 $lineHash = array(); // e.g. lineHash['Hello\n'] == 4
279 // '\x00' is a valid character, but various debuggers don't like it.
280 // So we'll insert a junk entry to avoid generating a null character.
281 $lineArray[0] = '';
283 $chars1 = $this->diff_linesToCharsMunge($text1, $lineArray, $lineHash);
284 $chars2 = $this->diff_linesToCharsMunge($text2, $lineArray, $lineHash);
285 return array (
286 $chars1,
287 $chars2,
288 $lineArray
293 * Split a text into an array of strings. Reduce the texts to a string of
294 * hashes where each Unicode character represents one line.
295 * Modifies linearray and linehash through being a closure.
296 * @param {string} text String to encode
297 * @return {string} Encoded string
298 * @private
300 function diff_linesToCharsMunge($text, &$lineArray, &$lineHash) {
301 $chars = '';
302 // Walk the text, pulling out a mb_substring for each line.
303 // text.split('\n') would would temporarily double our memory footprint.
304 // Modifying text would create many large strings to garbage collect.
305 $lineStart = 0;
306 $lineEnd = -1;
307 // Keeping our own length variable is faster than looking it up.
308 $lineArrayLength = count($lineArray);
309 while ($lineEnd < mb_strlen($text) - 1) {
310 $lineEnd = mb_strpos($text, "\n", $lineStart);
311 if ($lineEnd === false) {
312 $lineEnd = mb_strlen($text) - 1;
314 $line = mb_substr($text, $lineStart, $lineEnd +1 -$lineStart);
315 $lineStart = $lineEnd +1;
317 if ( isset($lineHash[$line]) ) {
318 $chars .= mb_chr($lineHash[$line]);
319 } else {
320 $chars .= mb_chr($lineArrayLength);
321 $lineHash[$line] = $lineArrayLength;
322 $lineArray[$lineArrayLength++] = $line;
325 return $chars;
328 * Rehydrate the text in a diff from a string of line hashes to real lines of
329 * text.
330 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
331 * @param {Array.<string>} lineArray Array of unique strings.
332 * @private
334 function diff_charsToLines(&$diffs, $lineArray) {
335 for ($x = 0; $x < count($diffs); $x++) {
336 $chars = $diffs[$x][1];
337 $text = array ();
338 for ($y = 0; $y < mb_strlen($chars); $y++) {
339 $text[$y] = $lineArray[charCodeAt($chars, $y)];
341 $diffs[$x][1] = implode('',$text);
346 * Explore the intersection points between the two texts.
347 * @param {string} text1 Old string to be diffed.
348 * @param {string} text2 New string to be diffed.
349 * @return {Array.<Array.<number|string>>?} Array of diff tuples or null if no
350 * diff available.
351 * @private
353 function diff_map($text1, $text2) {
354 // Don't run for too long.
355 $ms_end = microtime(true) + $this->Diff_Timeout;
357 // Cache the text lengths to prevent multiple calls.
358 $text1_length = mb_strlen($text1);
359 $text2_length = mb_strlen($text2);
360 $max_d = $text1_length + $text2_length -1;
361 $doubleEnd = $this->Diff_DualThreshold * 2 < $max_d;
362 $v_map1 = array();
363 $v_map2 = array();
364 $v1 = array();
365 $v2 = array();
366 $v1[1] = 0;
367 $v2[1] = 0;
368 $x = null;
369 $y = null;
370 $footstep = null; // Used to track overlapping paths.
371 $footsteps = array();
372 $done = false;
373 // Safari 1.x doesn't have hasOwnProperty
374 //? $hasOwnProperty = !!(footsteps.hasOwnProperty);
375 // If the total number of characters is odd, then the front path will collide
376 // with the reverse path.
377 $front = ($text1_length + $text2_length) % 2;
378 for ($d = 0; $d < $max_d; $d++) {
379 // Bail out if timeout reached.
380 if ($this->Diff_Timeout > 0 && microtime(true) > $ms_end) {
381 return null; // zzz
384 // Walk the front path one step.
385 $v_map1[$d] = array ();
386 for ($k = -$d; $k <= $d; $k += 2) {
387 if ($k == -$d || $k != $d && $v1[$k -1] < $v1[$k +1]) {
388 $x = $v1[$k +1];
389 } else {
390 $x = $v1[$k -1] + 1;
392 $y = $x - $k;
393 if ($doubleEnd) {
394 $footstep = $x . ',' . $y;
395 if ($front && isset ($footsteps[$footstep])) {
396 $done = true;
398 if (!$front) {
399 $footsteps[$footstep] = $d;
402 while (!$done && ($x < $text1_length) && ($y < $text2_length) && (mb_substr($text1, $x, 1) == mb_substr($text2, $y, 1)) ) {
403 $x++;
404 $y++;
405 if ($doubleEnd) {
406 $footstep = $x . ',' . $y;
407 if ($front && isset ($footsteps[$footstep])) {
408 $done = true;
410 if (!$front) {
411 $footsteps[$footstep] = $d;
415 $v1[$k] = $x;
416 $v_map1[$d][$x . ',' . $y] = true;
417 if ($x == $text1_length && $y == $text2_length) {
418 // Reached the end in single-path mode.
419 return $this->diff_path1($v_map1, $text1, $text2);
421 elseif ($done) {
422 // Front path ran over reverse path.
424 $v_map2 = array_slice($v_map2, 0, $footsteps[$footstep] + 1);
425 $a = $this->diff_path1($v_map1, mb_substr($text1, 0, $x), mb_substr($text2, 0, $y));
427 return array_merge($a, $this->diff_path2($v_map2, mb_substr($text1, $x), mb_substr($text2, $y)));
431 if ($doubleEnd) {
432 // Walk the reverse path one step.
433 $v_map2[$d] = array();
434 for ($k = -$d; $k <= $d; $k += 2) {
435 if ($k == -$d || $k != $d && $v2[$k -1] < $v2[$k +1]) {
436 $x = $v2[$k +1];
437 } else {
438 $x = $v2[$k -1] + 1;
440 $y = $x - $k;
441 $footstep = ($text1_length - $x) . ',' . ($text2_length - $y);
442 if (!$front && isset ($footsteps[$footstep])) {
443 $done = true;
445 if ($front) {
446 $footsteps[$footstep] = $d;
448 while (!$done && $x < $text1_length && $y < $text2_length && mb_substr($text1, $text1_length - $x -1, 1) == mb_substr($text2, $text2_length - $y -1, 1) ) {
449 $x++;
450 $y++;
451 $footstep = ($text1_length - $x) . ',' . ($text2_length - $y);
452 if (!$front && isset ($footsteps[$footstep])) {
453 $done = true;
455 if ($front) {
456 $footsteps[$footstep] = $d;
459 $v2[$k] = $x;
460 $v_map2[$d][$x . ',' . $y] = true;
461 if ($done) {
462 // Reverse path ran over front path.
463 $v_map1 = array_slice($v_map1, 0, $footsteps[$footstep] + 1);
464 $a = $this->diff_path1($v_map1, mb_substr($text1, 0, $text1_length - $x), mb_substr($text2, 0, $text2_length - $y));
465 return array_merge($a, $this->diff_path2($v_map2, mb_substr($text1, $text1_length - $x), mb_substr($text2, $text2_length - $y)));
470 // Number of diffs equals number of characters, no commonality at all.
471 return null;
475 * Work from the middle back to the start to determine the path.
476 * @param {Array.<Object>} v_map Array of paths.ers
477 * @param {string} text1 Old string fragment to be diffed.
478 * @param {string} text2 New string fragment to be diffed.
479 * @return {Array.<Array.<number|string>>} Array of diff tuples.
480 * @private
482 function diff_path1($v_map, $text1, $text2) {
483 $path = array ();
484 $x = mb_strlen($text1);
485 $y = mb_strlen($text2);
486 /** @type {number?} */
487 $last_op = null;
488 for ($d = count($v_map) - 2; $d >= 0; $d--) {
489 while (1) {
490 if (isset ($v_map[$d][($x -1) . ',' . $y])) {
491 $x--;
492 if ($last_op === DIFF_DELETE) {
493 $path[0][1] = mb_substr($text1, $x, 1) . $path[0][1];
494 } else {
495 array_unshift($path, array (
496 DIFF_DELETE,
497 mb_substr($text1, $x, 1)
500 $last_op = DIFF_DELETE;
501 break;
502 } elseif (isset ($v_map[$d][$x . ',' . ($y -1)])) {
503 $y--;
504 if ($last_op === DIFF_INSERT) {
505 $path[0][1] = mb_substr($text2, $y, 1) . $path[0][1];
506 } else {
507 array_unshift($path, array (
508 DIFF_INSERT,
509 mb_substr($text2, $y, 1)
512 $last_op = DIFF_INSERT;
513 break;
514 } else {
515 $x--;
516 $y--;
517 //if (text1.charAt(x) != text2.charAt(y)) {
518 // throw new Error('No diagonal. Can\'t happen. (diff_path1)');
520 if ($last_op === DIFF_EQUAL) {
521 $path[0][1] = mb_substr($text1, $x, 1) . $path[0][1];
522 } else {
523 array_unshift($path, array (
524 DIFF_EQUAL,
525 mb_substr($text1, $x, 1)
528 $last_op = DIFF_EQUAL;
532 return $path;
536 * Work from the middle back to the end to determine the path.
537 * @param {Array.<Object>} v_map Array of paths.
538 * @param {string} text1 Old string fragment to be diffed.
539 * @param {string} text2 New string fragment to be diffed.
540 * @return {Array.<Array.<number|string>>} Array of diff tuples.
541 * @private
543 function diff_path2($v_map, $text1, $text2) {
544 $path = array ();
545 $pathLength = 0;
546 $x = mb_strlen($text1);
547 $y = mb_strlen($text2);
548 /** @type {number?} */
549 $last_op = null;
550 for ($d = count($v_map) - 2; $d >= 0; $d--) {
551 while (1) {
552 if (isset ($v_map[$d][($x -1) . ',' . $y])) {
553 $x--;
554 if ($last_op === DIFF_DELETE) {
555 $path[$pathLength -1][1] .= $text1[mb_strlen($text1) - $x -1];
556 } else {
557 $path[$pathLength++] = array (
558 DIFF_DELETE,
559 $text1[mb_strlen($text1) - $x -1]
562 $last_op = DIFF_DELETE;
563 break;
565 elseif (isset ($v_map[$d][$x . ',' . ($y -1)])) {
566 $y--;
567 if ($last_op === DIFF_INSERT) {
568 $path[$pathLength -1][1] .= $text2[mb_strlen($text2) - $y -1];
569 } else {
570 $path[$pathLength++] = array (
571 DIFF_INSERT,
572 $text2[mb_strlen($text2) - $y -1]
575 $last_op = DIFF_INSERT;
576 break;
577 } else {
578 $x--;
579 $y--;
580 //if (text1.charAt(text1.length - x - 1) !=
581 // text2.charAt(text2.length - y - 1)) {
582 // throw new Error('No diagonal. Can\'t happen. (diff_path2)');
584 if ($last_op === DIFF_EQUAL) {
585 $path[$pathLength -1][1] .= $text1[mb_strlen($text1) - $x -1];
586 } else {
587 $path[$pathLength++] = array (
588 DIFF_EQUAL,
589 $text1[mb_strlen($text1) - $x -1]
592 $last_op = DIFF_EQUAL;
596 return $path;
600 * Determine the common prefix of two strings
601 * @param {string} text1 First string.
602 * @param {string} text2 Second string.
603 * @return {number} The number of characters common to the start of each
604 * string.
606 function diff_commonPrefix($text1, $text2) {
607 for ($i = 0; 1; $i++) {
608 $t1 = mb_substr($text1, $i, 1);
609 $t2 = mb_substr($text2, $i, 1);
610 if($t1==='' || $t2==='' || $t1 !== $t2 ){
611 return $i;
617 * Determine the common suffix of two strings
618 * @param {string} text1 First string.
619 * @param {string} text2 Second string.
620 * @return {number} The number of characters common to the end of each string.
622 function diff_commonSuffix($text1, $text2) {
623 return $this->diff_commonPrefix( strrev($text1), strrev($text2) );
627 * Do the two texts share a mb_substring which is at least half the length of the
628 * longer text?
629 * @param {string} text1 First string.
630 * @param {string} text2 Second string.
631 * @return {Array.<string>?} Five element Array, containing the prefix of
632 * text1, the suffix of text1, the prefix of text2, the suffix of
633 * text2 and the common middle. Or null if there was no match.
635 function diff_halfMatch($text1, $text2) {
636 $longtext = mb_strlen($text1) > mb_strlen($text2) ? $text1 : $text2;
637 $shorttext = mb_strlen($text1) > mb_strlen($text2) ? $text2 : $text1;
638 if (mb_strlen($longtext) < 10 || mb_strlen($shorttext) < 1) {
639 return null; // Pointless.
642 // First check if the second quarter is the seed for a half-match.
643 $hm1 = $this->diff_halfMatchI($longtext, $shorttext, ceil(mb_strlen($longtext) / 4));
644 // Check again based on the third quarter.
645 $hm2 = $this->diff_halfMatchI($longtext, $shorttext, ceil(mb_strlen($longtext) / 2));
647 if (!$hm1 && !$hm2) {
648 return null;
649 } elseif (!$hm2) {
650 $hm = $hm1;
651 } elseif (!$hm1) {
652 $hm = $hm2;
653 } else {
654 // Both matched. Select the longest.
655 $hm = mb_strlen($hm1[4]) > mb_strlen($hm2[4]) ? $hm1 : $hm2;
658 // A half-match was found, sort out the return data.
659 if (mb_strlen($text1) > mb_strlen($text2)) {
660 $text1_a = $hm[0];
661 $text1_b = $hm[1];
662 $text2_a = $hm[2];
663 $text2_b = $hm[3];
664 } else {
665 $text2_a = $hm[0];
666 $text2_b = $hm[1];
667 $text1_a = $hm[2];
668 $text1_b = $hm[3];
670 $mid_common = $hm[4];
671 return array( $text1_a, $text1_b, $text2_a, $text2_b, $mid_common );
675 * Does a mb_substring of shorttext exist within longtext such that the mb_substring
676 * is at least half the length of longtext?
677 * Closure, but does not reference any external variables.
678 * @param {string} longtext Longer string.
679 * @param {string} shorttext Shorter string.
680 * @param {number} i Start index of quarter length mb_substring within longtext
681 * @return {Array.<string>?} Five element Array, containing the prefix of
682 * longtext, the suffix of longtext, the prefix of shorttext, the suffix
683 * of shorttext and the common middle. Or null if there was no match.
684 * @private
686 function diff_halfMatchI($longtext, $shorttext, $i) {
687 // Start with a 1/4 length mb_substring at position i as a seed.
688 $seed = mb_substr($longtext, $i, floor(mb_strlen($longtext) / 4));
690 $j = -1;
691 $best_common = '';
692 $best_longtext_a = null;
693 $best_longtext_b = null;
694 $best_shorttext_a = null;
695 $best_shorttext_b = null;
696 while ( ($j = mb_strpos($shorttext, $seed, $j + 1)) !== false ) {
697 $prefixLength = $this->diff_commonPrefix(mb_substr($longtext, $i), mb_substr($shorttext, $j));
698 $suffixLength = $this->diff_commonSuffix(mb_substr($longtext, 0, $i), mb_substr($shorttext, 0, $j));
699 if (mb_strlen($best_common) < $suffixLength + $prefixLength) {
700 $best_common = mb_substr($shorttext, $j - $suffixLength, $suffixLength) . mb_substr($shorttext, $j, $prefixLength);
701 $best_longtext_a = mb_substr($longtext, 0, $i - $suffixLength);
702 $best_longtext_b = mb_substr($longtext, $i + $prefixLength);
703 $best_shorttext_a = mb_substr($shorttext, 0, $j - $suffixLength);
704 $best_shorttext_b = mb_substr($shorttext, $j + $prefixLength);
707 if (mb_strlen($best_common) >= mb_strlen($longtext) / 2) {
708 return array (
709 $best_longtext_a,
710 $best_longtext_b,
711 $best_shorttext_a,
712 $best_shorttext_b,
713 $best_common
715 } else {
716 return null;
721 * Reduce the number of edits by eliminating semantically trivial equalities.
722 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
724 function diff_cleanupSemantic(&$diffs) {
725 $changes = false;
726 $equalities = array (); // Stack of indices where equalities are found.
727 $equalitiesLength = 0; // Keeping our own length var is faster in JS.
728 $lastequality = null; // Always equal to equalities[equalitiesLength-1][1]
729 $pointer = 0; // Index of current position.
730 // Number of characters that changed prior to the equality.
731 $length_changes1 = 0;
732 // Number of characters that changed after the equality.
733 $length_changes2 = 0;
734 while ($pointer < count($diffs)) {
735 if ($diffs[$pointer][0] == DIFF_EQUAL) { // equality found
736 $equalities[$equalitiesLength++] = $pointer;
737 $length_changes1 = $length_changes2;
738 $length_changes2 = 0;
739 $lastequality = $diffs[$pointer][1];
740 } else { // an insertion or deletion
741 $length_changes2 += mb_strlen($diffs[$pointer][1]);
742 if ($lastequality !== null && (mb_strlen($lastequality) <= $length_changes1) && (mb_strlen($lastequality) <= $length_changes2)) {
743 // Duplicate record
744 $zzz_diffs = array_splice($diffs, $equalities[$equalitiesLength -1], 0, array(array (
745 DIFF_DELETE,
746 $lastequality
747 )));
748 // Change second copy to insert.
749 $diffs[$equalities[$equalitiesLength -1] + 1][0] = DIFF_INSERT;
750 // Throw away the equality we just deleted.
751 $equalitiesLength--;
752 // Throw away the previous equality (it needs to be reevaluated).
753 $equalitiesLength--;
754 $pointer = $equalitiesLength > 0 ? $equalities[$equalitiesLength -1] : -1;
755 $length_changes1 = 0; // Reset the counters.
756 $length_changes2 = 0;
757 $lastequality = null;
758 $changes = true;
761 $pointer++;
763 if ($changes) {
764 $this->diff_cleanupMerge($diffs);
766 $this->diff_cleanupSemanticLossless($diffs);
770 * Look for single edits surrounded on both sides by equalities
771 * which can be shifted sideways to align the edit to a word boundary.
772 * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
773 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
775 function diff_cleanupSemanticLossless(&$diffs) {
777 $pointer = 1;
778 // Intentionally ignore the first and last element (don't need checking).
779 while ($pointer < count($diffs) - 1) {
780 if ($diffs[$pointer -1][0] == DIFF_EQUAL && $diffs[$pointer +1][0] == DIFF_EQUAL) {
781 // This is a single edit surrounded by equalities.
782 $equality1 = $diffs[$pointer -1][1];
783 $edit = $diffs[$pointer][1];
784 $equality2 = $diffs[$pointer +1][1];
786 // First, shift the edit as far left as possible.
787 $commonOffset = $this->diff_commonSuffix($equality1, $edit);
788 if ($commonOffset !== '') {
789 $commonString = mb_substr($edit, mb_strlen($edit) - $commonOffset);
790 $equality1 = mb_substr($equality1, 0, mb_strlen($equality1) - $commonOffset);
791 $edit = $commonString . mb_substr($edit, 0, mb_strlen($edit) - $commonOffset);
792 $equality2 = $commonString . $equality2;
795 // Second, step character by character right, looking for the best fit.
796 $bestEquality1 = $equality1;
797 $bestEdit = $edit;
798 $bestEquality2 = $equality2;
799 $bestScore = $this->diff_cleanupSemanticScore($equality1, $edit) + $this->diff_cleanupSemanticScore($edit, $equality2);
800 while (isset($equality2[0]) && $edit[0] === $equality2[0]) {
801 $equality1 .= $edit[0];
802 $edit = mb_substr($edit, 1) . $equality2[0];
803 $equality2 = mb_substr($equality2, 1);
804 $score = $this->diff_cleanupSemanticScore($equality1, $edit) + $this->diff_cleanupSemanticScore($edit, $equality2);
805 // The >= encourages trailing rather than leading whitespace on edits.
806 if ($score >= $bestScore) {
807 $bestScore = $score;
808 $bestEquality1 = $equality1;
809 $bestEdit = $edit;
810 $bestEquality2 = $equality2;
814 if ($diffs[$pointer -1][1] != $bestEquality1) {
815 // We have an improvement, save it back to the diff.
816 if ($bestEquality1) {
817 $diffs[$pointer -1][1] = $bestEquality1;
818 } else {
819 $zzz_diffs = array_splice($diffs, $pointer -1, 1);
820 $pointer--;
822 $diffs[$pointer][1] = $bestEdit;
823 if ($bestEquality2) {
824 $diffs[$pointer +1][1] = $bestEquality2;
825 } else {
826 $zzz_diffs = array_splice($diffs, $pointer +1, 1);
827 $pointer--;
831 $pointer++;
836 * Given two strings, compute a score representing whether the internal
837 * boundary falls on logical boundaries.
838 * Scores range from 5 (best) to 0 (worst).
839 * Closure, makes reference to regex patterns defined above.
840 * @param {string} one First string
841 * @param {string} two Second string
842 * @return {number} The score.
844 function diff_cleanupSemanticScore($one, $two) {
845 // Define some regex patterns for matching boundaries.
846 $punctuation = '/[^a-zA-Z0-9]/';
847 $whitespace = '/\s/';
848 $linebreak = '/[\r\n]/';
849 $blanklineEnd = '/\n\r?\n$/';
850 $blanklineStart = '/^\r?\n\r?\n/';
852 if (!$one || !$two) {
853 // Edges are the best.
854 return 5;
857 // Each port of this function behaves slightly differently due to
858 // subtle differences in each language's definition of things like
859 // 'whitespace'. Since this function's purpose is largely cosmetic,
860 // the choice has been made to use each language's native features
861 // rather than force total conformity.
862 $score = 0;
863 // One point for non-alphanumeric.
864 if (preg_match($punctuation, $one[mb_strlen($one) - 1]) || preg_match($punctuation, $two[0])) {
865 $score++;
866 // Two points for whitespace.
867 if (preg_match($whitespace, $one[mb_strlen($one) - 1] ) || preg_match($whitespace, $two[0])) {
868 $score++;
869 // Three points for line breaks.
870 if (preg_match($linebreak, $one[mb_strlen($one) - 1]) || preg_match($linebreak, $two[0])) {
871 $score++;
872 // Four points for blank lines.
873 if (preg_match($blanklineEnd, $one) || preg_match($blanklineStart, $two)) {
874 $score++;
879 return $score;
883 * Reduce the number of edits by eliminating operationally trivial equalities.
884 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
886 function diff_cleanupEfficiency(&$diffs) {
887 $changes = false;
888 $equalities = array (); // Stack of indices where equalities are found.
889 $equalitiesLength = 0; // Keeping our own length var is faster in JS.
890 $lastequality = ''; // Always equal to equalities[equalitiesLength-1][1]
891 $pointer = 0; // Index of current position.
892 // Is there an insertion operation before the last equality.
893 $pre_ins = false;
894 // Is there a deletion operation before the last equality.
895 $pre_del = false;
896 // Is there an insertion operation after the last equality.
897 $post_ins = false;
898 // Is there a deletion operation after the last equality.
899 $post_del = false;
900 while ($pointer < count($diffs)) {
901 if ($diffs[$pointer][0] == DIFF_EQUAL) { // equality found
902 if (mb_strlen($diffs[$pointer][1]) < $this->Diff_EditCost && ($post_ins || $post_del)) {
903 // Candidate found.
904 $equalities[$equalitiesLength++] = $pointer;
905 $pre_ins = $post_ins;
906 $pre_del = $post_del;
907 $lastequality = $diffs[$pointer][1];
908 } else {
909 // Not a candidate, and can never become one.
910 $equalitiesLength = 0;
911 $lastequality = '';
913 $post_ins = $post_del = false;
914 } else { // an insertion or deletion
915 if ($diffs[$pointer][0] == DIFF_DELETE) {
916 $post_del = true;
917 } else {
918 $post_ins = true;
921 * Five types to be split:
922 * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
923 * <ins>A</ins>X<ins>C</ins><del>D</del>
924 * <ins>A</ins><del>B</del>X<ins>C</ins>
925 * <ins>A</del>X<ins>C</ins><del>D</del>
926 * <ins>A</ins><del>B</del>X<del>C</del>
928 if ($lastequality && (($pre_ins && $pre_del && $post_ins && $post_del) || ((mb_strlen($lastequality) < $this->Diff_EditCost / 2) && ($pre_ins + $pre_del + $post_ins + $post_del) == 3))) {
929 // Duplicate record
930 $zzz_diffs = array_splice($diffs, $equalities[$equalitiesLength -1], 0, array(array (
931 DIFF_DELETE,
932 $lastequality
933 )));
934 // Change second copy to insert.
935 $diffs[$equalities[$equalitiesLength -1] + 1][0] = DIFF_INSERT;
936 $equalitiesLength--; // Throw away the equality we just deleted;
937 $lastequality = '';
938 if ($pre_ins && $pre_del) {
939 // No changes made which could affect previous entry, keep going.
940 $post_ins = $post_del = true;
941 $equalitiesLength = 0;
942 } else {
943 $equalitiesLength--; // Throw away the previous equality;
944 $pointer = $equalitiesLength > 0 ? $equalities[$equalitiesLength -1] : -1;
945 $post_ins = $post_del = false;
947 $changes = true;
950 $pointer++;
953 if ($changes) {
954 $this->diff_cleanupMerge($diffs);
959 * Reorder and merge like edit sections. Merge equalities.
960 * Any edit section can move as long as it doesn't cross an equality.
961 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
963 function diff_cleanupMerge(&$diffs) {
964 array_push($diffs, array ( DIFF_EQUAL, '' )); // Add a dummy entry at the end.
965 $pointer = 0;
966 $count_delete = 0;
967 $count_insert = 0;
968 $text_delete = '';
969 $text_insert = '';
970 $commonlength = null;
971 while ($pointer < count($diffs)) {
972 switch ($diffs[$pointer][0]) {
973 case DIFF_INSERT :
974 $count_insert++;
975 $text_insert .= $diffs[$pointer][1];
976 $pointer++;
977 break;
978 case DIFF_DELETE :
979 $count_delete++;
980 $text_delete .= $diffs[$pointer][1];
981 $pointer++;
982 break;
983 case DIFF_EQUAL :
984 // Upon reaching an equality, check for prior redundancies.
985 if ($count_delete !== 0 || $count_insert !== 0) {
986 if ($count_delete !== 0 && $count_insert !== 0) {
987 // Factor out any common prefixies.
988 $commonlength = $this->diff_commonPrefix($text_insert, $text_delete);
989 if ($commonlength !== 0) {
990 if (($pointer - $count_delete - $count_insert) > 0 && $diffs[$pointer - $count_delete - $count_insert -1][0] == DIFF_EQUAL) {
991 $diffs[$pointer - $count_delete - $count_insert -1][1] .= mb_substr($text_insert, 0, $commonlength);
992 } else {
993 array_splice($diffs, 0, 0, array(array (
994 DIFF_EQUAL,
995 mb_substr($text_insert, 0, $commonlength)
996 )));
997 $pointer++;
999 $text_insert = mb_substr($text_insert, $commonlength);
1000 $text_delete = mb_substr($text_delete, $commonlength);
1002 // Factor out any common suffixies.
1003 $commonlength = $this->diff_commonSuffix($text_insert, $text_delete);
1004 if ($commonlength !== 0) {
1005 $diffs[$pointer][1] = mb_substr($text_insert, mb_strlen($text_insert) - $commonlength) . $diffs[$pointer][1];
1006 $text_insert = mb_substr($text_insert, 0, mb_strlen($text_insert) - $commonlength);
1007 $text_delete = mb_substr($text_delete, 0, mb_strlen($text_delete) - $commonlength);
1010 // Delete the offending records and add the merged ones.
1011 if ($count_delete === 0) {
1012 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+$count_insert, array(array(
1013 DIFF_INSERT,
1014 $text_insert
1015 )));
1016 } elseif ($count_insert === 0) {
1017 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+$count_insert, array(array(
1018 DIFF_DELETE,
1019 $text_delete
1020 )));
1021 } else {
1022 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+$count_insert, array(array(
1023 DIFF_DELETE,
1024 $text_delete
1025 ), array (
1026 DIFF_INSERT,
1027 $text_insert
1028 )));
1030 $pointer = $pointer - $count_delete - $count_insert + ($count_delete ? 1 : 0) + ($count_insert ? 1 : 0) + 1;
1031 } elseif ($pointer !== 0 && $diffs[$pointer -1][0] == DIFF_EQUAL) {
1032 // Merge this equality with the previous one.
1033 $diffs[$pointer -1][1] .= $diffs[$pointer][1];
1034 array_splice($diffs, $pointer, 1);
1035 } else {
1036 $pointer++;
1038 $count_insert = 0;
1039 $count_delete = 0;
1040 $text_delete = '';
1041 $text_insert = '';
1042 break;
1045 if ($diffs[count($diffs) - 1][1] === '') {
1046 array_pop($diffs); // Remove the dummy entry at the end.
1049 // Second pass: look for single edits surrounded on both sides by equalities
1050 // which can be shifted sideways to eliminate an equality.
1051 // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1052 $changes = false;
1053 $pointer = 1;
1054 // Intentionally ignore the first and last element (don't need checking).
1055 while ($pointer < count($diffs) - 1) {
1056 if ($diffs[$pointer-1][0] == DIFF_EQUAL && $diffs[$pointer+1][0] == DIFF_EQUAL) {
1057 // This is a single edit surrounded by equalities.
1058 if ( mb_substr($diffs[$pointer][1], mb_strlen($diffs[$pointer][1]) - mb_strlen($diffs[$pointer -1][1])) == $diffs[$pointer -1][1]) {
1059 // Shift the edit over the previous equality.
1060 $diffs[$pointer][1] = $diffs[$pointer -1][1] . mb_substr($diffs[$pointer][1], 0, mb_strlen($diffs[$pointer][1]) - mb_strlen($diffs[$pointer -1][1]));
1061 $diffs[$pointer +1][1] = $diffs[$pointer -1][1] . $diffs[$pointer +1][1];
1062 array_splice($diffs, $pointer -1, 1);
1063 $changes = true;
1064 } elseif (mb_substr($diffs[$pointer][1], 0, mb_strlen($diffs[$pointer +1][1])) == $diffs[$pointer +1][1]) {
1065 // Shift the edit over the next equality.
1066 $diffs[$pointer -1][1] .= $diffs[$pointer +1][1];
1068 $diffs[$pointer][1] = mb_substr($diffs[$pointer][1], mb_strlen($diffs[$pointer +1][1])) . $diffs[$pointer +1][1];
1069 array_splice($diffs, $pointer +1, 1);
1070 $changes = true;
1073 $pointer++;
1075 // If shifts were made, the diff needs reordering and another shift sweep.
1076 if ($changes) {
1077 $this->diff_cleanupMerge($diffs);
1082 * loc is a location in text1, compute and return the equivalent location in
1083 * text2.
1084 * e.g. 'The cat' vs 'The big cat', 1->1, 5->8
1085 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1086 * @param {number} loc Location within text1.
1087 * @return {number} Location within text2.
1089 function diff_xIndex($diffs, $loc) {
1090 $chars1 = 0;
1091 $chars2 = 0;
1092 $last_chars1 = 0;
1093 $last_chars2 = 0;
1094 for ($x = 0; $x < count($diffs); $x++) {
1095 if ($diffs[$x][0] !== DIFF_INSERT) { // Equality or deletion.
1096 $chars1 += mb_strlen($diffs[$x][1]);
1098 if ($diffs[$x][0] !== DIFF_DELETE) { // Equality or insertion.
1099 $chars2 += mb_strlen($diffs[$x][1]);
1101 if ($chars1 > $loc) { // Overshot the location.
1102 break;
1104 $last_chars1 = $chars1;
1105 $last_chars2 = $chars2;
1107 // Was the location was deleted?
1108 if (count($diffs) != $x && $diffs[$x][0] === DIFF_DELETE) {
1109 return $last_chars2;
1111 // Add the remaining character length.
1112 return $last_chars2 + ($loc - $last_chars1);
1116 * Convert a diff array into a pretty HTML report.
1117 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1118 * @return {string} HTML representation.
1120 function diff_prettyHtml($diffs) {
1121 $html = array ();
1122 $i = 0;
1123 for ($x = 0; $x < count($diffs); $x++) {
1124 $op = $diffs[$x][0]; // Operation (insert, delete, equal)
1125 $data = $diffs[$x][1]; // Text of change.
1126 $text = preg_replace(array (
1127 '/&/',
1128 '/</',
1129 '/>/',
1130 "/\n/"
1131 ), array (
1132 '&amp;',
1133 '&lt;',
1134 '&gt;',
1135 '&para;<BR>'
1136 ), $data);
1138 switch ($op) {
1139 case DIFF_INSERT :
1140 $html[$x] = '<INS STYLE="background:#E6FFE6;" TITLE="i=' . $i . '">' . $text . '</INS>';
1141 break;
1142 case DIFF_DELETE :
1143 $html[$x] = '<DEL STYLE="background:#FFE6E6;" TITLE="i=' . $i . '">' . $text . '</DEL>';
1144 break;
1145 case DIFF_EQUAL :
1146 $html[$x] = '<SPAN TITLE="i=' . $i . '">' . $text . '</SPAN>';
1147 break;
1149 if ($op !== DIFF_DELETE) {
1150 $i += mb_strlen($data);
1153 return implode('',$html);
1157 * Compute and return the source text (all equalities and deletions).
1158 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1159 * @return {string} Source text.
1161 function diff_text1($diffs) {
1162 $text = array ();
1163 for ($x = 0; $x < count($diffs); $x++) {
1164 if ($diffs[$x][0] !== DIFF_INSERT) {
1165 $text[$x] = $diffs[$x][1];
1168 return implode('',$text);
1172 * Compute and return the destination text (all equalities and insertions).
1173 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1174 * @return {string} Destination text.
1176 function diff_text2($diffs) {
1177 $text = array ();
1178 for ($x = 0; $x < count($diffs); $x++) {
1179 if ($diffs[$x][0] !== DIFF_DELETE) {
1180 $text[$x] = $diffs[$x][1];
1183 return implode('',$text);
1187 * Compute the Levenshtein distance; the number of inserted, deleted or
1188 * substituted characters.
1189 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1190 * @return {number} Number of changes.
1192 function diff_levenshtein($diffs) {
1193 $levenshtein = 0;
1194 $insertions = 0;
1195 $deletions = 0;
1196 for ($x = 0; $x < count($diffs); $x++) {
1197 $op = $diffs[$x][0];
1198 $data = $diffs[$x][1];
1199 switch ($op) {
1200 case DIFF_INSERT :
1201 $insertions += mb_strlen($data);
1202 break;
1203 case DIFF_DELETE :
1204 $deletions += mb_strlen($data);
1205 break;
1206 case DIFF_EQUAL :
1207 // A deletion and an insertion is one substitution.
1208 $levenshtein += max($insertions, $deletions);
1209 $insertions = 0;
1210 $deletions = 0;
1211 break;
1214 $levenshtein += max($insertions, $deletions);
1215 return $levenshtein;
1219 * Crush the diff into an encoded string which describes the operations
1220 * required to transform text1 into text2.
1221 * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1222 * Operations are tab-separated. Inserted text is escaped using %xx notation.
1223 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1224 * @return {string} Delta text.
1226 function diff_toDelta($diffs) {
1227 $text = array ();
1228 for ($x = 0; $x < count($diffs); $x++) {
1229 switch ($diffs[$x][0]) {
1230 case DIFF_INSERT :
1231 $text[$x] = '+' .encodeURI($diffs[$x][1]);
1232 break;
1233 case DIFF_DELETE :
1234 $text[$x] = '-' .mb_strlen($diffs[$x][1]);
1235 break;
1236 case DIFF_EQUAL :
1237 $text[$x] = '=' .mb_strlen($diffs[$x][1]);
1238 break;
1241 return str_replace('%20', ' ', implode("\t", $text));
1245 * Given the original text1, and an encoded string which describes the
1246 * operations required to transform text1 into text2, compute the full diff.
1247 * @param {string} text1 Source string for the diff.
1248 * @param {string} delta Delta text.
1249 * @return {Array.<Array.<number|string>>} Array of diff tuples.
1250 * @throws {Error} If invalid input.
1252 function diff_fromDelta($text1, $delta) {
1253 $diffs = array ();
1254 $diffsLength = 0; // Keeping our own length var is faster in JS.
1255 $pointer = 0; // Cursor in text1
1256 $tokens = preg_split("/\t/", $delta);
1258 for ($x = 0; $x < count($tokens); $x++) {
1259 // Each token begins with a one character parameter which specifies the
1260 // operation of this token (delete, insert, equality).
1261 $param = mb_substr($tokens[$x], 1);
1262 switch ($tokens[$x][0]) {
1263 case '+' :
1264 try {
1265 $diffs[$diffsLength++] = array (
1266 DIFF_INSERT,
1267 decodeURI($param)
1269 } catch (Exception $ex) {
1270 echo_Exception('Illegal escape in diff_fromDelta: ' . $param);
1271 // Malformed URI sequence.
1273 break;
1274 case '-' :
1275 // Fall through.
1276 case '=' :
1277 $n = (int) $param;
1278 if ($n < 0) {
1279 echo_Exception('Invalid number in diff_fromDelta: ' . $param);
1281 $text = mb_substr($text1, $pointer, $n);
1282 $pointer += $n;
1283 if ($tokens[$x][0] == '=') {
1284 $diffs[$diffsLength++] = array (
1285 DIFF_EQUAL,
1286 $text
1288 } else {
1289 $diffs[$diffsLength++] = array (
1290 DIFF_DELETE,
1291 $text
1294 break;
1295 default :
1296 // Blank tokens are ok (from a trailing \t).
1297 // Anything else is an error.
1298 if ($tokens[$x]) {
1299 echo_Exception('Invalid diff operation in diff_fromDelta: ' . $tokens[$x]);
1303 if ($pointer != mb_strlen($text1)) {
1304 // throw new Exception('Delta length (' . $pointer . ') does not equal source text length (' . mb_strlen($text1) . ').');
1305 echo_Exception('Delta length (' . $pointer . ') does not equal source text length (' . mb_strlen($text1) . ').');
1307 return $diffs;
1310 // MATCH FUNCTIONS
1313 * Locate the best instance of 'pattern' in 'text' near 'loc'.
1314 * @param {string} text The text to search.
1315 * @param {string} pattern The pattern to search for.
1316 * @param {number} loc The location to search around.
1317 * @return {number} Best match index or -1.
1319 function match_main($text, $pattern, $loc) {
1320 $loc = max(0, min($loc, mb_strlen($text)));
1321 if ($text == $pattern) {
1322 // Shortcut (potentially not guaranteed by the algorithm)
1323 return 0;
1325 elseif (!mb_strlen($text)) {
1326 // Nothing to match.
1327 return -1;
1329 elseif (mb_substr($text, $loc, mb_strlen($pattern)) == $pattern) {
1330 // Perfect match at the perfect spot! (Includes case of null pattern)
1331 return $loc;
1332 } else {
1333 // Do a fuzzy compare.
1334 return $this->match_bitap($text, $pattern, $loc);
1339 * Locate the best instance of 'pattern' in 'text' near 'loc' using the
1340 * Bitap algorithm.
1341 * @param {string} text The text to search.
1342 * @param {string} pattern The pattern to search for.
1343 * @param {number} loc The location to search around.
1344 * @return {number} Best match index or -1.
1345 * @private
1347 function match_bitap($text, $pattern, $loc) {
1348 if (mb_strlen($pattern) > Match_MaxBits) {
1349 echo_Exception('Pattern too long for this browser.');
1352 // Initialise the alphabet.
1353 $s = $this->match_alphabet($pattern);
1355 // Highest score beyond which we give up.
1356 $score_threshold = $this->Match_Threshold;
1358 // Is there a nearby exact match? (speedup)
1359 $best_loc = mb_strpos($text, $pattern, $loc);
1360 if ($best_loc !== false) {
1361 $score_threshold = min($this->match_bitapScore(0, $best_loc, $pattern, $loc), $score_threshold);
1364 // What about in the other direction? (speedup)
1365 $best_loc = mb_strrpos( $text, $pattern, min($loc + mb_strlen($pattern), mb_strlen($text)) );
1366 if ($best_loc !== false) {
1367 $score_threshold = min($this->match_bitapScore(0, $best_loc, $pattern, $loc), $score_threshold);
1370 // Initialise the bit arrays.
1371 $matchmask = 1 << (mb_strlen($pattern) - 1);
1372 $best_loc = -1;
1374 $bin_min = null;
1375 $bin_mid = null;
1376 $bin_max = mb_strlen($pattern) + mb_strlen($text);
1377 $last_rd = null;
1378 for ($d = 0; $d < mb_strlen($pattern); $d++) {
1379 // Scan for the best match; each iteration allows for one more error.
1380 // Run a binary search to determine how far from 'loc' we can stray at this
1381 // error level.
1382 $bin_min = 0;
1383 $bin_mid = $bin_max;
1384 while ($bin_min < $bin_mid) {
1385 if ($this->match_bitapScore($d, $loc + $bin_mid, $pattern, $loc) <= $score_threshold) {
1386 $bin_min = $bin_mid;
1387 } else {
1388 $bin_max = $bin_mid;
1390 $bin_mid = floor(($bin_max - $bin_min) / 2 + $bin_min);
1392 // Use the result from this iteration as the maximum for the next.
1393 $bin_max = $bin_mid;
1394 $start = max(1, $loc - $bin_mid +1);
1395 $finish = min($loc + $bin_mid, mb_strlen($text)) + mb_strlen($pattern);
1397 $rd = Array (
1398 $finish +2
1400 $rd[$finish +1] = (1 << $d) - 1;
1401 for ($j = $finish; $j >= $start; $j--) {
1402 // The alphabet (s) is a sparse hash, so the following line generates
1403 // warnings.
1404 @ $charMatch = $s[ $text[$j -1] ];
1405 if ($d === 0) { // First pass: exact match.
1406 $rd[$j] = (($rd[$j +1] << 1) | 1) & $charMatch;
1407 } else { // Subsequent passes: fuzzy match.
1408 $rd[$j] = (($rd[$j +1] << 1) | 1) & $charMatch | ((($last_rd[$j +1] | $last_rd[$j]) << 1) | 1) | $last_rd[$j +1];
1410 if ($rd[$j] & $matchmask) {
1411 $score = $this->match_bitapScore($d, $j -1, $pattern, $loc);
1412 // This match will almost certainly be better than any existing match.
1413 // But check anyway.
1414 if ($score <= $score_threshold) {
1415 // Told you so.
1416 $score_threshold = $score;
1417 $best_loc = $j -1;
1418 if ($best_loc > $loc) {
1419 // When passing loc, don't exceed our current distance from loc.
1420 $start = max(1, 2 * $loc - $best_loc);
1421 } else {
1422 // Already passed loc, downhill from here on in.
1423 break;
1428 // No hope for a (better) match at greater error levels.
1429 if ($this->match_bitapScore($d +1, $loc, $pattern, $loc) > $score_threshold) {
1430 break;
1432 $last_rd = $rd;
1434 return (int)$best_loc;
1438 * Compute and return the score for a match with e errors and x location.
1439 * Accesses loc and pattern through being a closure.
1440 * @param {number} e Number of errors in match.
1441 * @param {number} x Location of match.
1442 * @return {number} Overall score for match (0.0 = good, 1.0 = bad).
1443 * @private
1445 function match_bitapScore($e, $x, $pattern, $loc) {
1446 $accuracy = $e / mb_strlen($pattern);
1447 $proximity = abs($loc - $x);
1448 if (!$this->Match_Distance) {
1449 // Dodge divide by zero error.
1450 return $proximity ? 1.0 : $accuracy;
1452 return $accuracy + ($proximity / $this->Match_Distance);
1456 * Initialise the alphabet for the Bitap algorithm.
1457 * @param {string} pattern The text to encode.
1458 * @return {Object} Hash of character locations.
1459 * @private
1461 function match_alphabet($pattern) {
1462 $s = array ();
1463 for ($i = 0; $i < mb_strlen($pattern); $i++) {
1464 $s[ $pattern[$i] ] = 0;
1466 for ($i = 0; $i < mb_strlen($pattern); $i++) {
1467 $s[ $pattern[$i] ] |= 1 << (mb_strlen($pattern) - $i - 1);
1469 return $s;
1472 // PATCH FUNCTIONS
1475 * Increase the context until it is unique,
1476 * but don't let the pattern expand beyond Match_MaxBits.
1477 * @param {patch_obj} patch The patch to grow.
1478 * @param {string} text Source text.
1479 * @private
1481 function patch_addContext($patch, $text) {
1482 $pattern = mb_substr($text, $patch->start2, $patch->length1 );
1483 $previousPattern = null;
1484 $padding = 0;
1485 $i = 0;
1486 while (
1487 ( mb_strlen($pattern) === 0 // Javascript's indexOf/lastIndexOd return 0/strlen respectively if pattern = ''
1488 || mb_strpos($text, $pattern) !== mb_strrpos($text, $pattern)
1490 && $pattern !== $previousPattern // avoid infinte loop
1491 && mb_strlen($pattern) < Match_MaxBits - $this->Patch_Margin - $this->Patch_Margin ) {
1492 $padding += $this->Patch_Margin;
1493 $previousPattern = $pattern;
1494 $pattern = mb_substr($text, max($patch->start2 - $padding,0), ($patch->start2 + $patch->length1 + $padding) - max($patch->start2 - $padding,0) );
1496 // Add one chunk for good luck.
1497 $padding += $this->Patch_Margin;
1498 // Add the prefix.
1499 $prefix = mb_substr($text, max($patch->start2 - $padding,0), $patch->start2 - max($patch->start2 - $padding,0) );
1500 if ($prefix!=='') {
1501 array_unshift($patch->diffs, array (
1502 DIFF_EQUAL,
1503 $prefix
1506 // Add the suffix.
1507 $suffix = mb_substr($text, $patch->start2 + $patch->length1, ($patch->start2 + $patch->length1 + $padding) - ($patch->start2 + $patch->length1) );
1508 if ($suffix!=='') {
1509 array_push($patch->diffs, array (
1510 DIFF_EQUAL,
1511 $suffix
1515 // Roll back the start points.
1516 $patch->start1 -= mb_strlen($prefix);
1517 $patch->start2 -= mb_strlen($prefix);
1518 // Extend the lengths.
1519 $patch->length1 += mb_strlen($prefix) + mb_strlen($suffix);
1520 $patch->length2 += mb_strlen($prefix) + mb_strlen($suffix);
1524 * Compute a list of patches to turn text1 into text2.
1525 * Use diffs if provided, otherwise compute it ourselves.
1526 * There are four ways to call this function, depending on what data is
1527 * available to the caller:
1528 * Method 1:
1529 * a = text1, b = text2
1530 * Method 2:
1531 * a = diffs
1532 * Method 3 (optimal):
1533 * a = text1, b = diffs
1534 * Method 4 (deprecated, use method 3):
1535 * a = text1, b = text2, c = diffs
1537 * @param {string|Array.<Array.<number|string>>} a text1 (methods 1,3,4) or
1538 * Array of diff tuples for text1 to text2 (method 2).
1539 * @param {string|Array.<Array.<number|string>>} opt_b text2 (methods 1,4) or
1540 * Array of diff tuples for text1 to text2 (method 3) or undefined (method 2).
1541 * @param {string|Array.<Array.<number|string>>} opt_c Array of diff tuples for
1542 * text1 to text2 (method 4) or undefined (methods 1,2,3).
1543 * @return {Array.<patch_obj>} Array of patch objects.
1545 function patch_make($a, $opt_b = null, $opt_c = null ) {
1546 if (is_string($a) && is_string($opt_b) && $opt_c === null ) {
1547 // Method 1: text1, text2
1548 // Compute diffs from text1 and text2.
1549 $text1 = $a;
1550 $diffs = $this->diff_main($text1, $opt_b, true);
1551 if ( count($diffs) > 2) {
1552 $this->diff_cleanupSemantic($diffs);
1553 $this->diff_cleanupEfficiency($diffs);
1555 } elseif ( is_array($a) && $opt_b === null && $opt_c === null) {
1556 // Method 2: diffs
1557 // Compute text1 from diffs.
1558 $diffs = $a;
1559 $text1 = $this->diff_text1($diffs);
1560 } elseif ( is_string($a) && is_array($opt_b) && $opt_c === null) {
1561 // Method 3: text1, diffs
1562 $text1 = $a;
1563 $diffs = $opt_b;
1564 } elseif ( is_string($a) && is_string($opt_b) && is_array($opt_c) ) {
1565 // Method 4: text1, text2, diffs
1566 // text2 is not used.
1567 $text1 = $a;
1568 $diffs = $opt_c;
1569 } else {
1570 echo_Exception('Unknown call format to patch_make.');
1573 if ( count($diffs) === 0) {
1574 return array(); // Get rid of the null case.
1576 $patches = array();
1577 $patch = new patch_obj();
1578 $patchDiffLength = 0; // Keeping our own length var is faster in JS.
1579 $char_count1 = 0; // Number of characters into the text1 string.
1580 $char_count2 = 0; // Number of characters into the text2 string.
1581 // Start with text1 (prepatch_text) and apply the diffs until we arrive at
1582 // text2 (postpatch_text). We recreate the patches one by one to determine
1583 // context info.
1584 $prepatch_text = $text1;
1585 $postpatch_text = $text1;
1586 for ($x = 0; $x < count($diffs); $x++) {
1587 $diff_type = $diffs[$x][0];
1588 $diff_text = $diffs[$x][1];
1590 if (!$patchDiffLength && $diff_type !== DIFF_EQUAL) {
1591 // A new patch starts here.
1592 $patch->start1 = $char_count1;
1593 $patch->start2 = $char_count2;
1596 switch ($diff_type) {
1597 case DIFF_INSERT :
1598 $patch->diffs[$patchDiffLength++] = $diffs[$x];
1599 $patch->length2 += mb_strlen($diff_text);
1600 $postpatch_text = mb_substr($postpatch_text, 0, $char_count2) . $diff_text . mb_substr($postpatch_text,$char_count2);
1601 break;
1602 case DIFF_DELETE :
1603 $patch->length1 += mb_strlen($diff_text);
1604 $patch->diffs[$patchDiffLength++] = $diffs[$x];
1605 $postpatch_text = mb_substr($postpatch_text, 0, $char_count2) . mb_substr($postpatch_text, $char_count2 + mb_strlen($diff_text) );
1606 break;
1607 case DIFF_EQUAL :
1608 if ( mb_strlen($diff_text) <= 2 * $this->Patch_Margin && $patchDiffLength && count($diffs) != $x + 1) {
1609 // Small equality inside a patch.
1610 $patch->diffs[$patchDiffLength++] = $diffs[$x];
1611 $patch->length1 += mb_strlen($diff_text);
1612 $patch->length2 += mb_strlen($diff_text);
1613 } elseif ( mb_strlen($diff_text) >= 2 * $this->Patch_Margin ) {
1614 // Time for a new patch.
1615 if ($patchDiffLength) {
1616 $this->patch_addContext($patch, $prepatch_text);
1617 array_push($patches,$patch);
1618 $patch = new patch_obj();
1619 $patchDiffLength = 0;
1620 // Unlike Unidiff, our patch lists have a rolling context.
1621 // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1622 // Update prepatch text & pos to reflect the application of the
1623 // just completed patch.
1624 $prepatch_text = $postpatch_text;
1625 $char_count1 = $char_count2;
1628 break;
1631 // Update the current character count.
1632 if ($diff_type !== DIFF_INSERT) {
1633 $char_count1 += mb_strlen($diff_text);
1635 if ($diff_type !== DIFF_DELETE) {
1636 $char_count2 += mb_strlen($diff_text);
1639 // Pick up the leftover patch if not empty.
1640 if ($patchDiffLength) {
1641 $this->patch_addContext($patch, $prepatch_text);
1642 array_push($patches, $patch);
1645 return $patches;
1649 * Given an array of patches, return another array that is identical.
1650 * @param {Array.<patch_obj>} patches Array of patch objects.
1651 * @return {Array.<patch_obj>} Array of patch objects.
1653 function patch_deepCopy($patches) {
1654 // Making deep copies is hard in JavaScript.
1655 $patchesCopy = array();
1656 for ($x = 0; $x < count($patches); $x++) {
1657 $patch = $patches[$x];
1658 $patchCopy = new patch_obj();
1659 for ($y = 0; $y < count($patch->diffs); $y++) {
1660 $patchCopy->diffs[$y] = $patch->diffs[$y]; // ?? . slice();
1662 $patchCopy->start1 = $patch->start1;
1663 $patchCopy->start2 = $patch->start2;
1664 $patchCopy->length1 = $patch->length1;
1665 $patchCopy->length2 = $patch->length2;
1666 $patchesCopy[$x] = $patchCopy;
1668 return $patchesCopy;
1672 * Merge a set of patches onto the text. Return a patched text, as well
1673 * as a list of true/false values indicating which patches were applied.
1674 * @param {Array.<patch_obj>} patches Array of patch objects.
1675 * @param {string} text Old text.
1676 * @return {Array.<string|Array.<boolean>>} Two element Array, containing the
1677 * new text and an array of boolean values.
1679 function patch_apply($patches, $text) {
1680 if ( count($patches) == 0) {
1681 return array($text,array());
1684 // Deep copy the patches so that no changes are made to originals.
1685 $patches = $this->patch_deepCopy($patches);
1687 $nullPadding = $this->patch_addPadding($patches);
1688 $text = $nullPadding . $text . $nullPadding;
1690 $this->patch_splitMax($patches);
1691 // delta keeps track of the offset between the expected and actual location
1692 // of the previous patch. If there are patches expected at positions 10 and
1693 // 20, but the first patch was found at 12, delta is 2 and the second patch
1694 // has an effective expected position of 22.
1695 $delta = 0;
1696 $results = array();
1697 for ($x = 0; $x < count($patches) ; $x++) {
1698 $expected_loc = $patches[$x]->start2 + $delta;
1699 $text1 = $this->diff_text1($patches[$x]->diffs);
1700 $start_loc = null;
1701 $end_loc = -1;
1702 if (mb_strlen($text1) > Match_MaxBits) {
1703 // patch_splitMax will only provide an oversized pattern in the case of
1704 // a monster delete.
1705 $start_loc = $this->match_main($text, mb_substr($text1, 0, Match_MaxBits ), $expected_loc);
1706 if ($start_loc != -1) {
1707 $end_loc = $this->match_main($text, mb_substr($text1,mb_strlen($text1) - Match_MaxBits), $expected_loc + mb_strlen($text1) - Match_MaxBits);
1708 if ($end_loc == -1 || $start_loc >= $end_loc) {
1709 // Can't find valid trailing context. Drop this patch.
1710 $start_loc = -1;
1713 } else {
1714 $start_loc = $this->match_main($text, $text1, $expected_loc);
1716 if ($start_loc == -1) {
1717 // No match found. :(
1718 $results[$x] = false;
1719 // Subtract the delta for this failed patch from subsequent patches.
1720 $delta -= $patches[$x]->length2 - $patches[$x]->length1;
1721 } else {
1722 // Found a match. :)
1723 $results[$x] = true;
1724 $delta = $start_loc - $expected_loc;
1725 $text2 = null;
1726 if ($end_loc == -1) {
1727 $text2 = mb_substr($text, $start_loc, mb_strlen($text1) );
1728 } else {
1729 $text2 = mb_substr($text, $start_loc, $end_loc + Match_MaxBits - $start_loc);
1731 if ($text1 == $text2) {
1732 // Perfect match, just shove the replacement text in.
1733 $text = mb_substr($text, 0, $start_loc) . $this->diff_text2($patches[$x]->diffs) . mb_substr($text,$start_loc + mb_strlen($text1) );
1734 } else {
1735 // Imperfect match. Run a diff to get a framework of equivalent
1736 // indices.
1737 $diffs = $this->diff_main($text1, $text2, false);
1738 if ( mb_strlen($text1) > Match_MaxBits && $this->diff_levenshtein($diffs) / mb_strlen($text1) > $this->Patch_DeleteThreshold) {
1739 // The end points match, but the content is unacceptably bad.
1740 $results[$x] = false;
1741 } else {
1742 $this->diff_cleanupSemanticLossless($diffs);
1743 $index1 = 0;
1744 $index2 = NULL;
1745 for ($y = 0; $y < count($patches[$x]->diffs); $y++) {
1746 $mod = $patches[$x]->diffs[$y];
1747 if ($mod[0] !== DIFF_EQUAL) {
1748 $index2 = $this->diff_xIndex($diffs, $index1);
1750 if ($mod[0] === DIFF_INSERT) { // Insertion
1751 $text = mb_substr($text, 0, $start_loc + $index2) . $mod[1] . mb_substr($text, $start_loc + $index2);
1752 } elseif ($mod[0] === DIFF_DELETE) { // Deletion
1753 $text = mb_substr($text, 0, $start_loc + $index2) . mb_substr($text,$start_loc + $this->diff_xIndex($diffs, $index1 + mb_strlen($mod[1]) ));
1755 if ($mod[0] !== DIFF_DELETE) {
1756 $index1 += mb_strlen($mod[1]);
1763 // Strip the padding off.
1764 $text = mb_substr($text, mb_strlen($nullPadding), mb_strlen($text) - 2*mb_strlen($nullPadding) );
1765 return array($text, $results);
1769 * Add some padding on text start and end so that edges can match something.
1770 * Intended to be called only from within patch_apply.
1771 * @param {Array.<patch_obj>} patches Array of patch objects.
1772 * @return {string} The padding string added to each side.
1774 function patch_addPadding(&$patches){
1775 $paddingLength = $this->Patch_Margin;
1776 $nullPadding = '';
1777 for ($x = 1; $x <= $paddingLength; $x++) {
1778 $nullPadding .= mb_chr($x);
1781 // Bump all the patches forward.
1782 for ($x = 0; $x < count($patches); $x++) {
1783 $patches[$x]->start1 += $paddingLength;
1784 $patches[$x]->start2 += $paddingLength;
1787 // Add some padding on start of first diff.
1788 $patch = &$patches[0];
1789 $diffs = &$patch->diffs;
1790 if (count($diffs) == 0 || $diffs[0][0] != DIFF_EQUAL) {
1791 // Add nullPadding equality.
1792 array_unshift($diffs, array(DIFF_EQUAL, $nullPadding));
1793 $patch->start1 -= $paddingLength; // Should be 0.
1794 $patch->start2 -= $paddingLength; // Should be 0.
1795 $patch->length1 += $paddingLength;
1796 $patch->length2 += $paddingLength;
1797 } elseif ($paddingLength > mb_strlen($diffs[0][1]) ) {
1798 // Grow first equality.
1799 $extraLength = $paddingLength - mb_strlen($diffs[0][1]);
1800 $diffs[0][1] = mb_substr( $nullPadding , mb_strlen($diffs[0][1]) ) . $diffs[0][1];
1801 $patch->start1 -= $extraLength;
1802 $patch->start2 -= $extraLength;
1803 $patch->length1 += $extraLength;
1804 $patch->length2 += $extraLength;
1807 // Add some padding on end of last diff.
1808 $patch = &$patches[count($patches) - 1];
1809 $diffs = &$patch->diffs;
1810 if ( count($diffs) == 0 || $diffs[ count($diffs) - 1][0] != DIFF_EQUAL) {
1811 // Add nullPadding equality.
1812 array_push($diffs, array(DIFF_EQUAL, $nullPadding) );
1813 $patch->length1 += $paddingLength;
1814 $patch->length2 += $paddingLength;
1815 } elseif ($paddingLength > mb_strlen( $diffs[count($diffs)-1][1] ) ) {
1816 // Grow last equality.
1817 $extraLength = $paddingLength - mb_strlen( $diffs[count($diffs)-1][1] );
1818 $diffs[ count($diffs)-1][1] .= mb_substr($nullPadding,0,$extraLength);
1819 $patch->length1 += $extraLength;
1820 $patch->length2 += $extraLength;
1823 return $nullPadding;
1827 * Look through the patches and break up any which are longer than the maximum
1828 * limit of the match algorithm.
1829 * @param {Array.<patch_obj>} patches Array of patch objects.
1831 function patch_splitMax(&$patches) {
1832 for ($x = 0; $x < count($patches); $x++) {
1833 if ( $patches[$x]->length1 > Match_MaxBits) {
1834 $bigpatch = $patches[$x];
1835 // Remove the big old patch.
1836 array_splice($patches,$x--,1);
1837 $patch_size = Match_MaxBits;
1838 $start1 = $bigpatch->start1;
1839 $start2 = $bigpatch->start2;
1840 $precontext = '';
1841 while ( count($bigpatch->diffs) !== 0) {
1842 // Create one of several smaller patches.
1843 $patch = new patch_obj();
1844 $empty = true;
1845 $patch->start1 = $start1 - mb_strlen($precontext);
1846 $patch->start2 = $start2 - mb_strlen($precontext);
1847 if ($precontext !== '') {
1848 $patch->length1 = $patch->length2 = mb_strlen($precontext);
1849 array_push($patch->diffs, array(DIFF_EQUAL, $precontext) );
1851 while ( count($bigpatch->diffs) !== 0 && $patch->length1 < $patch_size - $this->Patch_Margin) {
1852 $diff_type = $bigpatch->diffs[0][0];
1853 $diff_text = $bigpatch->diffs[0][1];
1854 if ($diff_type === DIFF_INSERT) {
1855 // Insertions are harmless.
1856 $patch->length2 += mb_strlen($diff_text);
1857 $start2 += mb_strlen($diff_text);
1858 array_push($patch->diffs, array_shift($bigpatch->diffs) );
1859 $empty = false;
1860 } else
1861 if ($diff_type === DIFF_DELETE && count($patch->diffs) == 1 && $patch->diffs[0][0] == DIFF_EQUAL && (mb_strlen($diff_text) > 2 * $patch_size) ) {
1862 // This is a large deletion. Let it pass in one chunk.
1863 $patch->length1 += mb_strlen($diff_text);
1864 $start1 += mb_strlen($diff_text);
1865 $empty = false;
1866 array_push( $patch->diffs, array($diff_type, $diff_text) );
1867 array_shift($bigpatch->diffs);
1868 } else {
1869 // Deletion or equality. Only take as much as we can stomach.
1870 $diff_text = mb_substr($diff_text, 0, $patch_size - $patch->length1 - $this->Patch_Margin);
1871 $patch->length1 += mb_strlen($diff_text);
1872 $start1 += mb_strlen($diff_text);
1873 if ($diff_type === DIFF_EQUAL) {
1874 $patch->length2 += mb_strlen($diff_text);
1875 $start2 += mb_strlen($diff_text);
1876 } else {
1877 $empty = false;
1879 array_push($patch->diffs, array($diff_type, $diff_text) );
1880 if ($diff_text == $bigpatch->diffs[0][1]) {
1881 array_shift($bigpatch->diffs);
1882 } else {
1883 $bigpatch->diffs[0][1] = mb_substr( $bigpatch->diffs[0][1],mb_strlen($diff_text) );
1887 // Compute the head context for the next patch.
1888 $precontext = $this->diff_text2($patch->diffs);
1889 $precontext = mb_substr($precontext, mb_strlen($precontext)-$this->Patch_Margin);
1890 // Append the end context for this patch.
1891 $postcontext = mb_substr( $this->diff_text1($bigpatch->diffs), 0, $this->Patch_Margin );
1892 if ($postcontext !== '') {
1893 $patch->length1 += mb_strlen($postcontext);
1894 $patch->length2 += mb_strlen($postcontext);
1895 if ( count($patch->diffs) !== 0 && $patch->diffs[ count($patch->diffs) - 1][0] === DIFF_EQUAL) {
1896 $patch->diffs[ count($patch->diffs) - 1][1] .= $postcontext;
1897 } else {
1898 array_push($patch->diffs, array(DIFF_EQUAL, $postcontext));
1901 if (!$empty) {
1902 array_splice($patches, ++$x, 0, array($patch));
1910 * Take a list of patches and return a textual representation.
1911 * @param {Array.<patch_obj>} patches Array of patch objects.
1912 * @return {string} Text representation of patches.
1914 function patch_toText($patches) {
1915 $text = array();
1916 for ($x = 0; $x < count($patches) ; $x++) {
1917 $text[$x] = $patches[$x];
1919 return implode('',$text);
1923 * Parse a textual representation of patches and return a list of patch objects.
1924 * @param {string} textline Text representation of patches.
1925 * @return {Array.<patch_obj>} Array of patch objects.
1926 * @throws {Error} If invalid input.
1928 function patch_fromText($textline) {
1929 $patches = array();
1930 if ($textline === '') {
1931 return $patches;
1933 $text = explode("\n",$textline);
1934 foreach($text as $i=>$t){ if($t===''){ unset($text[$i]); } }
1935 $textPointer = 0;
1936 while ($textPointer < count($text) ) {
1937 $m = null;
1938 preg_match('/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/',$text[$textPointer],$m);
1939 if (!$m) {
1940 echo_Exception('Invalid patch string: ' . $text[$textPointer]);
1942 $patch = new patch_obj();
1943 array_push($patches, $patch);
1944 @$patch->start1 = (int)$m[1];
1945 if (@$m[2] === '') {
1946 $patch->start1--;
1947 $patch->length1 = 1;
1948 } elseif ( @$m[2] == '0') {
1949 $patch->length1 = 0;
1950 } else {
1951 $patch->start1--;
1952 @$patch->length1 = (int)$m[2];
1955 @$patch->start2 = (int)$m[3];
1956 if (@$m[4] === '') {
1957 $patch->start2--;
1958 $patch->length2 = 1;
1959 } elseif ( @$m[4] == '0') {
1960 $patch->length2 = 0;
1961 } else {
1962 $patch->start2--;
1963 @$patch->length2 = (int)$m[4];
1965 $textPointer++;
1967 while ($textPointer < count($text) ) {
1968 $sign = $text[$textPointer][0];
1969 try {
1970 $line = decodeURI( mb_substr($text[$textPointer],1) );
1971 } catch (Exception $ex) {
1972 // Malformed URI sequence.
1973 throw new Exception('Illegal escape in patch_fromText: ' . $line);
1975 if ($sign == '-') {
1976 // Deletion.
1977 array_push( $patch->diffs, array(DIFF_DELETE, $line) );
1978 } elseif ($sign == '+') {
1979 // Insertion.
1980 array_push($patch->diffs, array(DIFF_INSERT, $line) );
1981 } elseif ($sign == ' ') {
1982 // Minor equality.
1983 array_push($patch->diffs, array(DIFF_EQUAL, $line) );
1984 } elseif ($sign == '@') {
1985 // Start of next patch.
1986 break;
1987 } elseif ($sign === '') {
1988 // Blank line? Whatever.
1989 } else {
1990 // WTF?
1991 echo_Exception('Invalid patch mode "' . $sign . '" in: ' . $line);
1993 $textPointer++;
1996 return $patches;
2001 * Class representing one patch operation.
2002 * @constructor
2004 class patch_obj {
2005 /** @type {Array.<Array.<number|string>>} */
2006 public $diffs = array();
2007 /** @type {number?} */
2008 public $start1 = null;
2009 /** @type {number?} */
2010 public $start2 = null;
2011 /** @type {number} */
2012 public $length1 = 0;
2013 /** @type {number} */
2014 public $length2 = 0;
2017 * Emmulate GNU diff's format.
2018 * Header: @@ -382,8 +481,9 @@
2019 * Indicies are printed as 1-based, not 0-based.
2020 * @return {string} The GNU diff string.
2022 function toString() {
2023 if ($this->length1 === 0) {
2024 $coords1 = $this->start1 . ',0';
2025 } elseif ($this->length1 == 1) {
2026 $coords1 = $this->start1 + 1;
2027 } else {
2028 $coords1 = ($this->start1 + 1) . ',' . $this->length1;
2030 if ($this->length2 === 0) {
2031 $coords2 = $this->start2 . ',0';
2032 } elseif ($this->length2 == 1) {
2033 $coords2 = $this->start2 + 1;
2034 } else {
2035 $coords2 = ($this->start2 + 1) . ',' . $this->length2;
2037 $text = array ( '@@ -' . $coords1 . ' +' . $coords2 . " @@\n" );
2039 // Escape the body of the patch with %xx notation.
2040 for ($x = 0; $x < count($this->diffs); $x++) {
2041 switch ($this->diffs[$x][0]) {
2042 case DIFF_INSERT :
2043 $op = '+';
2044 break;
2045 case DIFF_DELETE :
2046 $op = '-';
2047 break;
2048 case DIFF_EQUAL :
2049 $op = ' ';
2050 break;
2052 $text[$x +1] = $op . encodeURI($this->diffs[$x][1]) . "\n";
2054 return str_replace('%20', ' ', implode('',$text));
2056 function __toString(){
2057 return $this->toString();
2061 define('DIFF_DELETE', -1);
2062 define('DIFF_INSERT', 1);
2063 define('DIFF_EQUAL', 0);
2065 define('Match_MaxBits', PHP_INT_SIZE * 8);
2068 function charCodeAt($str, $pos) {
2069 return mb_ord(mb_substr($str, $pos, 1));
2071 function mb_ord($v) {
2072 $k = mb_convert_encoding($v, 'UCS-2LE', 'UTF-8');
2073 $k1 = ord(substr($k, 0, 1));
2074 $k2 = ord(substr($k, 1, 1));
2075 return $k2 * 256 + $k1;
2077 function mb_chr($num){
2078 return mb_convert_encoding('&#'.intval($num).';', 'UTF-8', 'HTML-ENTITIES');
2082 * as in javascript encodeURI() following the MDN description
2084 * @link https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURI
2085 * @param $url
2086 * @return string
2088 function encodeURI($url) {
2089 return strtr(rawurlencode($url), array (
2090 '%3B' => ';', '%2C' => ',', '%2F' => '/', '%3F' => '?', '%3A' => ':', '%40' => '@', '%26' => '&', '%3D' => '=',
2091 '%2B' => '+', '%24' => '$', '%21' => '!', '%2A' => '*', '%27' => '\'', '%28' => '(', '%29' => ')', '%23' => '#',
2095 function decodeURI($encoded) {
2096 static $dontDecode;
2097 if (!$dontDecode) {
2098 $table = array (
2099 '%3B' => ';', '%2C' => ',', '%2F' => '/', '%3F' => '?', '%3A' => ':', '%40' => '@', '%26' => '&', '%3D' => '=',
2100 '%2B' => '+', '%24' => '$', '%21' => '!', '%2A' => '*', '%27' => '\'', '%28' => '(', '%29' => ')', '%23' => '#',
2102 $dontDecode = array();
2103 foreach ($table as $k => $v) {
2104 $dontDecode[$k] = encodeURI($k);
2107 return rawurldecode(strtr($encoded, $dontDecode));
2110 function echo_Exception($str){
2111 global $lastException;
2112 $lastException = $str;
2113 echo $str;
2115 //mb_internal_encoding("UTF-8");