Use a fixed regex for StripState
[lhc/web/wiklou.git] / includes / parser / StripState.php
1 <?php
2 /**
3 * Holder for stripped items when parsing wiki markup.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 /**
25 * @todo document, briefly.
26 * @ingroup Parser
27 */
28 class StripState {
29 protected $id;
30 protected $prefix;
31 protected $data;
32 protected $regex;
33
34 protected $tempType, $tempMergePrefix;
35 protected $circularRefGuard;
36 protected $recursionLevel = 0;
37
38 const UNSTRIP_RECURSION_LIMIT = 20;
39
40 /**
41 * @param string $id
42 */
43 public function __construct( $id ) {
44 $this->id = $id;
45 $this->prefix = Parser::MARKER_PREFIX . $id;
46 $this->data = array(
47 'nowiki' => array(),
48 'general' => array()
49 );
50 $this->regex = "/" . Parser::MARKER_PREFIX .
51 '(' . Parser::MARKER_STATE_ID_REGEX . ")([^\x7f]+)" . Parser::MARKER_SUFFIX . '/';
52 $this->circularRefGuard = array();
53 }
54
55 /**
56 * Add a nowiki strip item
57 * @param string $marker
58 * @param string $value
59 */
60 public function addNoWiki( $marker, $value ) {
61 $this->addItem( 'nowiki', $marker, $value );
62 }
63
64 /**
65 * @param string $marker
66 * @param string $value
67 */
68 public function addGeneral( $marker, $value ) {
69 $this->addItem( 'general', $marker, $value );
70 }
71
72 /**
73 * @throws MWException
74 * @param string $type
75 * @param string $marker
76 * @param string $value
77 */
78 protected function addItem( $type, $marker, $value ) {
79 if ( !preg_match( $this->regex, $marker, $m ) || $m[1] !== $this->id ) {
80 throw new MWException( "Invalid marker: $marker" );
81 }
82
83 $this->data[$type][$m[2]] = $value;
84 }
85
86 /**
87 * @param string $text
88 * @return mixed
89 */
90 public function unstripGeneral( $text ) {
91 return $this->unstripType( 'general', $text );
92 }
93
94 /**
95 * @param string $text
96 * @return mixed
97 */
98 public function unstripNoWiki( $text ) {
99 return $this->unstripType( 'nowiki', $text );
100 }
101
102 /**
103 * @param string $text
104 * @return mixed
105 */
106 public function unstripBoth( $text ) {
107 $text = $this->unstripType( 'general', $text );
108 $text = $this->unstripType( 'nowiki', $text );
109 return $text;
110 }
111
112 /**
113 * @param string $type
114 * @param string $text
115 * @return mixed
116 */
117 protected function unstripType( $type, $text ) {
118 // Shortcut
119 if ( !count( $this->data[$type] ) ) {
120 return $text;
121 }
122
123 wfProfileIn( __METHOD__ );
124 $oldType = $this->tempType;
125 $this->tempType = $type;
126 $text = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text );
127 $this->tempType = $oldType;
128 wfProfileOut( __METHOD__ );
129 return $text;
130 }
131
132 /**
133 * @param array $m
134 * @return array
135 */
136 protected function unstripCallback( $m ) {
137 $marker = $m[2];
138 if ( $m[1] === $this->id && isset( $this->data[$this->tempType][$marker] ) ) {
139 if ( isset( $this->circularRefGuard[$marker] ) ) {
140 return '<span class="error">'
141 . wfMessage( 'parser-unstrip-loop-warning' )->inContentLanguage()->text()
142 . '</span>';
143 }
144 if ( $this->recursionLevel >= self::UNSTRIP_RECURSION_LIMIT ) {
145 return '<span class="error">' .
146 wfMessage( 'parser-unstrip-recursion-limit' )
147 ->numParams( self::UNSTRIP_RECURSION_LIMIT )->inContentLanguage()->text() .
148 '</span>';
149 }
150 $this->circularRefGuard[$marker] = true;
151 $this->recursionLevel++;
152 $ret = $this->unstripType( $this->tempType, $this->data[$this->tempType][$marker] );
153 $this->recursionLevel--;
154 unset( $this->circularRefGuard[$marker] );
155 return $ret;
156 } else {
157 return $m[0];
158 }
159 }
160
161 /**
162 * Get a StripState object which is sufficient to unstrip the given text.
163 * It will contain the minimum subset of strip items necessary.
164 *
165 * @param string $text
166 *
167 * @return StripState
168 */
169 public function getSubState( $text ) {
170 $subState = new StripState( $this->id );
171 $pos = 0;
172 while ( true ) {
173 $startPos = strpos( $text, $this->prefix, $pos );
174 $endPos = strpos( $text, Parser::MARKER_SUFFIX, $pos );
175 if ( $startPos === false || $endPos === false ) {
176 break;
177 }
178
179 $endPos += strlen( Parser::MARKER_SUFFIX );
180 $marker = substr( $text, $startPos, $endPos - $startPos );
181 if ( !preg_match( $this->regex, $marker, $m ) || $m[1] !== $this->id ) {
182 continue;
183 }
184
185 $key = $m[2];
186 if ( isset( $this->data['nowiki'][$key] ) ) {
187 $subState->data['nowiki'][$key] = $this->data['nowiki'][$key];
188 } elseif ( isset( $this->data['general'][$key] ) ) {
189 $subState->data['general'][$key] = $this->data['general'][$key];
190 }
191 $pos = $endPos;
192 }
193 return $subState;
194 }
195
196 /**
197 * Merge another StripState object into this one. The strip marker keys
198 * will not be preserved. The strings in the $texts array will have their
199 * strip markers rewritten, the resulting array of strings will be returned.
200 *
201 * @param StripState $otherState
202 * @param array $texts
203 * @return array
204 */
205 public function merge( $otherState, $texts ) {
206 $mergePrefix = Parser::getRandomString();
207
208 foreach ( $otherState->data as $type => $items ) {
209 foreach ( $items as $key => $value ) {
210 $this->data[$type]["$mergePrefix-$key"] = $value;
211 }
212 }
213
214 $this->tempMergePrefix = $mergePrefix;
215 $texts = preg_replace_callback( $otherState->regex, array( $this, 'mergeCallback' ), $texts );
216 $this->tempMergePrefix = null;
217 return $texts;
218 }
219
220 /**
221 * @param array $m
222 * @return string
223 */
224 protected function mergeCallback( $m ) {
225 if ( $m[1] === $this->id ) {
226 $key = $m[2];
227 return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
228 } else {
229 return $m[0];
230 }
231 }
232
233 /**
234 * Remove any strip markers found in the given text.
235 *
236 * @param string $text Input string
237 * @return string
238 */
239 public function killMarkers( $text ) {
240 $id = $this->id; // PHP 5.3 hack
241 return preg_replace_callback( $this->regex,
242 function ( $m ) use ( $id ) {
243 if ( $m[1] === $id ) {
244 return '';
245 } else {
246 return $m[0];
247 }
248 },
249 $text );
250 }
251 }