Limit total expansion size in StripState and improve limit handling
[lhc/web/wiklou.git] / includes / parser / StripState.php
1 <?php
2 /**
3 * Holder for stripped items when parsing wiki markup.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 /**
25 * @todo document, briefly.
26 * @ingroup Parser
27 */
28 class StripState {
29 protected $prefix;
30 protected $data;
31 protected $regex;
32
33 protected $parser;
34
35 protected $circularRefGuard;
36 protected $recursionLevel = 0;
37 protected $highestRecursionLevel = 0;
38 protected $expandSize = 0;
39
40 const UNSTRIP_RECURSION_LIMIT = 20;
41 const UNSTRIP_SIZE_LIMIT = 5000000;
42
43 /**
44 * @param Parser|null $parser
45 */
46 public function __construct( Parser $parser = null ) {
47 $this->data = [
48 'nowiki' => [],
49 'general' => []
50 ];
51 $this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/';
52 $this->circularRefGuard = [];
53 $this->parser = $parser;
54 }
55
56 /**
57 * Add a nowiki strip item
58 * @param string $marker
59 * @param string $value
60 */
61 public function addNoWiki( $marker, $value ) {
62 $this->addItem( 'nowiki', $marker, $value );
63 }
64
65 /**
66 * @param string $marker
67 * @param string $value
68 */
69 public function addGeneral( $marker, $value ) {
70 $this->addItem( 'general', $marker, $value );
71 }
72
73 /**
74 * @throws MWException
75 * @param string $type
76 * @param string $marker
77 * @param string $value
78 */
79 protected function addItem( $type, $marker, $value ) {
80 if ( !preg_match( $this->regex, $marker, $m ) ) {
81 throw new MWException( "Invalid marker: $marker" );
82 }
83
84 $this->data[$type][$m[1]] = $value;
85 }
86
87 /**
88 * @param string $text
89 * @return mixed
90 */
91 public function unstripGeneral( $text ) {
92 return $this->unstripType( 'general', $text );
93 }
94
95 /**
96 * @param string $text
97 * @return mixed
98 */
99 public function unstripNoWiki( $text ) {
100 return $this->unstripType( 'nowiki', $text );
101 }
102
103 /**
104 * @param string $text
105 * @return mixed
106 */
107 public function unstripBoth( $text ) {
108 $text = $this->unstripType( 'general', $text );
109 $text = $this->unstripType( 'nowiki', $text );
110 return $text;
111 }
112
113 /**
114 * @param string $type
115 * @param string $text
116 * @return mixed
117 */
118 protected function unstripType( $type, $text ) {
119 // Shortcut
120 if ( !count( $this->data[$type] ) ) {
121 return $text;
122 }
123
124 $callback = function ( $m ) use ( $type ) {
125 $marker = $m[1];
126 if ( isset( $this->data[$type][$marker] ) ) {
127 if ( isset( $this->circularRefGuard[$marker] ) ) {
128 return $this->getWarning( 'parser-unstrip-loop-warning' );
129 }
130
131 if ( $this->recursionLevel > $this->highestRecursionLevel ) {
132 $this->highestRecursionLevel = $this->recursionLevel;
133 }
134 if ( $this->recursionLevel >= self::UNSTRIP_RECURSION_LIMIT ) {
135 return $this->getLimitationWarning( 'unstrip-depth',
136 self::UNSTRIP_RECURSION_LIMIT );
137 }
138
139 $value = $this->data[$type][$marker];
140 if ( $value instanceof Closure ) {
141 $value = $value();
142 }
143
144 $this->expandSize += strlen( $value );
145 if ( $this->expandSize > self::UNSTRIP_SIZE_LIMIT ) {
146 return $this->getLimitationWarning( 'unstrip-size',
147 self::UNSTRIP_SIZE_LIMIT );
148 }
149
150 $this->circularRefGuard[$marker] = true;
151 $this->recursionLevel++;
152 $ret = $this->unstripType( $type, $value );
153 $this->recursionLevel--;
154 unset( $this->circularRefGuard[$marker] );
155
156 return $ret;
157 } else {
158 return $m[0];
159 }
160 };
161
162 $text = preg_replace_callback( $this->regex, $callback, $text );
163 return $text;
164 }
165
166 /**
167 * Get warning HTML and register a limitation warning with the parser
168 *
169 * @param string $type
170 * @param int $max
171 * @return string
172 */
173 private function getLimitationWarning( $type, $max = '' ) {
174 if ( $this->parser ) {
175 $this->parser->limitationWarn( $type, $max );
176 }
177 return $this->getWarning( "$type-warning", $max );
178 }
179
180 /**
181 * Get warning HTML
182 *
183 * @param string $message
184 * @param int $max
185 * @return string
186 */
187 private function getWarning( $message, $max = '' ) {
188 return '<span class="error">' .
189 wfMessage( $message )
190 ->numParams( $max )->inContentLanguage()->text() .
191 '</span>';
192 }
193
194 /**
195 * Get an array of parameters to pass to ParserOutput::setLimitReportData()
196 *
197 * @unstable Should only be called by Parser
198 * @return array
199 */
200 public function getLimitReport() {
201 return [
202 [ 'limitreport-unstrip-depth',
203 [
204 $this->highestRecursionLevel,
205 self::UNSTRIP_RECURSION_LIMIT
206 ],
207 ],
208 [ 'limitreport-unstrip-size',
209 [
210 $this->expandSize,
211 self::UNSTRIP_SIZE_LIMIT
212 ],
213 ]
214 ];
215 }
216
217 /**
218 * Get a StripState object which is sufficient to unstrip the given text.
219 * It will contain the minimum subset of strip items necessary.
220 *
221 * @param string $text
222 *
223 * @return StripState
224 */
225 public function getSubState( $text ) {
226 $subState = new StripState;
227 $pos = 0;
228 while ( true ) {
229 $startPos = strpos( $text, Parser::MARKER_PREFIX, $pos );
230 $endPos = strpos( $text, Parser::MARKER_SUFFIX, $pos );
231 if ( $startPos === false || $endPos === false ) {
232 break;
233 }
234
235 $endPos += strlen( Parser::MARKER_SUFFIX );
236 $marker = substr( $text, $startPos, $endPos - $startPos );
237 if ( !preg_match( $this->regex, $marker, $m ) ) {
238 continue;
239 }
240
241 $key = $m[1];
242 if ( isset( $this->data['nowiki'][$key] ) ) {
243 $subState->data['nowiki'][$key] = $this->data['nowiki'][$key];
244 } elseif ( isset( $this->data['general'][$key] ) ) {
245 $subState->data['general'][$key] = $this->data['general'][$key];
246 }
247 $pos = $endPos;
248 }
249 return $subState;
250 }
251
252 /**
253 * Merge another StripState object into this one. The strip marker keys
254 * will not be preserved. The strings in the $texts array will have their
255 * strip markers rewritten, the resulting array of strings will be returned.
256 *
257 * @param StripState $otherState
258 * @param array $texts
259 * @return array
260 */
261 public function merge( $otherState, $texts ) {
262 $mergePrefix = wfRandomString( 16 );
263
264 foreach ( $otherState->data as $type => $items ) {
265 foreach ( $items as $key => $value ) {
266 $this->data[$type]["$mergePrefix-$key"] = $value;
267 }
268 }
269
270 $callback = function ( $m ) use ( $mergePrefix ) {
271 $key = $m[1];
272 return Parser::MARKER_PREFIX . $mergePrefix . '-' . $key . Parser::MARKER_SUFFIX;
273 };
274 $texts = preg_replace_callback( $otherState->regex, $callback, $texts );
275 return $texts;
276 }
277
278 /**
279 * Remove any strip markers found in the given text.
280 *
281 * @param string $text
282 * @return string
283 */
284 public function killMarkers( $text ) {
285 return preg_replace( $this->regex, '', $text );
286 }
287 }