Merge "Move up devunt's name to Developers"
[lhc/web/wiklou.git] / includes / diff / WordLevelDiff.php
1 <?php
2 /**
3 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * You may copy this code freely under the conditions of the GPL.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup DifferenceEngine
23 * @defgroup DifferenceEngine DifferenceEngine
24 */
25
26 use MediaWiki\Diff\ComplexityException;
27 use MediaWiki\Diff\WordAccumulator;
28
29 /**
30 * Performs a word-level diff on several lines
31 *
32 * @ingroup DifferenceEngine
33 */
34 class WordLevelDiff extends \Diff {
35 /**
36 * @inheritdoc
37 */
38 protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
39
40 /**
41 * @param string[] $linesBefore
42 * @param string[] $linesAfter
43 */
44 public function __construct( $linesBefore, $linesAfter ) {
45
46 list( $wordsBefore, $wordsBeforeStripped ) = $this->split( $linesBefore );
47 list( $wordsAfter, $wordsAfterStripped ) = $this->split( $linesAfter );
48
49 try {
50 parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
51 } catch ( ComplexityException $ex ) {
52 // Too hard to diff, just show whole paragraph(s) as changed
53 $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
54 }
55
56 $xi = $yi = 0;
57 $editCount = count( $this->edits );
58 for ( $i = 0; $i < $editCount; $i++ ) {
59 $orig = &$this->edits[$i]->orig;
60 if ( is_array( $orig ) ) {
61 $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
62 $xi += count( $orig );
63 }
64
65 $closing = &$this->edits[$i]->closing;
66 if ( is_array( $closing ) ) {
67 $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
68 $yi += count( $closing );
69 }
70 }
71
72 }
73
74 /**
75 * @param string[] $lines
76 *
77 * @return array[]
78 */
79 private function split( $lines ) {
80
81 $words = [];
82 $stripped = [];
83 $first = true;
84 foreach ( $lines as $line ) {
85 if ( $first ) {
86 $first = false;
87 } else {
88 $words[] = "\n";
89 $stripped[] = "\n";
90 }
91 $m = [];
92 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
93 $line, $m ) ) {
94 foreach ( $m[0] as $word ) {
95 $words[] = $word;
96 }
97 foreach ( $m[1] as $stripped_word ) {
98 $stripped[] = $stripped_word;
99 }
100 }
101 }
102
103 return [ $words, $stripped ];
104 }
105
106 /**
107 * @return string[]
108 */
109 public function orig() {
110 $orig = new WordAccumulator;
111
112 foreach ( $this->edits as $edit ) {
113 if ( $edit->type == 'copy' ) {
114 $orig->addWords( $edit->orig );
115 } elseif ( $edit->orig ) {
116 $orig->addWords( $edit->orig, 'del' );
117 }
118 }
119 $lines = $orig->getLines();
120
121 return $lines;
122 }
123
124 /**
125 * @return string[]
126 */
127 public function closing() {
128 $closing = new WordAccumulator;
129
130 foreach ( $this->edits as $edit ) {
131 if ( $edit->type == 'copy' ) {
132 $closing->addWords( $edit->closing );
133 } elseif ( $edit->closing ) {
134 $closing->addWords( $edit->closing, 'ins' );
135 }
136 }
137 $lines = $closing->getLines();
138
139 return $lines;
140 }
141
142 }