Merge "Don't fallback from uk to ru"
[lhc/web/wiklou.git] / includes / libs / mime / XmlTypeCheck.php
1 <?php
2 /**
3 * XML syntax and type checker.
4 *
5 * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us
6 * more control over the expansion of XML entities. When passed to the
7 * callback, entities will be fully expanded, but may report the XML is
8 * invalid if expanding the entities are likely to cause a DoS.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 * http://www.gnu.org/copyleft/gpl.html
24 *
25 * @file
26 */
27
28 class XmlTypeCheck {
29 /**
30 * Will be set to true or false to indicate whether the file is
31 * well-formed XML. Note that this doesn't check schema validity.
32 */
33 public $wellFormed = null;
34
35 /**
36 * Will be set to true if the optional element filter returned
37 * a match at some point.
38 */
39 public $filterMatch = false;
40
41 /**
42 * Will contain the type of filter hit if the optional element filter returned
43 * a match at some point.
44 * @var mixed
45 */
46 public $filterMatchType = false;
47
48 /**
49 * Name of the document's root element, including any namespace
50 * as an expanded URL.
51 */
52 public $rootElement = '';
53
54 /**
55 * A stack of strings containing the data of each xml element as it's processed. Append
56 * data to the top string of the stack, then pop off the string and process it when the
57 * element is closed.
58 */
59 protected $elementData = [];
60
61 /**
62 * A stack of element names and attributes, as we process them.
63 */
64 protected $elementDataContext = [];
65
66 /**
67 * Current depth of the data stack.
68 */
69 protected $stackDepth = 0;
70
71 /**
72 * Additional parsing options
73 */
74 private $parserOptions = [
75 'processing_instruction_handler' => '',
76 ];
77
78 /**
79 * @param string $input a filename or string containing the XML element
80 * @param callable $filterCallback (optional)
81 * Function to call to do additional custom validity checks from the
82 * SAX element handler event. This gives you access to the element
83 * namespace, name, attributes, and text contents.
84 * Filter should return 'true' to toggle on $this->filterMatch
85 * @param bool $isFile (optional) indicates if the first parameter is a
86 * filename (default, true) or if it is a string (false)
87 * @param array $options list of additional parsing options:
88 * processing_instruction_handler: Callback for xml_set_processing_instruction_handler
89 */
90 function __construct( $input, $filterCallback = null, $isFile = true, $options = [] ) {
91 $this->filterCallback = $filterCallback;
92 $this->parserOptions = array_merge( $this->parserOptions, $options );
93 $this->validateFromInput( $input, $isFile );
94 }
95
96 /**
97 * Alternative constructor: from filename
98 *
99 * @param string $fname the filename of an XML document
100 * @param callable $filterCallback (optional)
101 * Function to call to do additional custom validity checks from the
102 * SAX element handler event. This gives you access to the element
103 * namespace, name, and attributes, but not to text contents.
104 * Filter should return 'true' to toggle on $this->filterMatch
105 * @return XmlTypeCheck
106 */
107 public static function newFromFilename( $fname, $filterCallback = null ) {
108 return new self( $fname, $filterCallback, true );
109 }
110
111 /**
112 * Alternative constructor: from string
113 *
114 * @param string $string a string containing an XML element
115 * @param callable $filterCallback (optional)
116 * Function to call to do additional custom validity checks from the
117 * SAX element handler event. This gives you access to the element
118 * namespace, name, and attributes, but not to text contents.
119 * Filter should return 'true' to toggle on $this->filterMatch
120 * @return XmlTypeCheck
121 */
122 public static function newFromString( $string, $filterCallback = null ) {
123 return new self( $string, $filterCallback, false );
124 }
125
126 /**
127 * Get the root element. Simple accessor to $rootElement
128 *
129 * @return string
130 */
131 public function getRootElement() {
132 return $this->rootElement;
133 }
134
135 /**
136 * @param string $fname the filename
137 */
138 private function validateFromInput( $xml, $isFile ) {
139 $reader = new XMLReader();
140 if ( $isFile ) {
141 $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
142 } else {
143 $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
144 }
145 if ( $s !== true ) {
146 // Couldn't open the XML
147 $this->wellFormed = false;
148 } else {
149 $oldDisable = libxml_disable_entity_loader( true );
150 $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
151 try {
152 $this->validate( $reader );
153 } catch ( Exception $e ) {
154 // Calling this malformed, because we didn't parse the whole
155 // thing. Maybe just an external entity refernce.
156 $this->wellFormed = false;
157 $reader->close();
158 libxml_disable_entity_loader( $oldDisable );
159 throw $e;
160 }
161 $reader->close();
162 libxml_disable_entity_loader( $oldDisable );
163 }
164 }
165
166 private function readNext( XMLReader $reader ) {
167 set_error_handler( [ $this, 'XmlErrorHandler' ] );
168 $ret = $reader->read();
169 restore_error_handler();
170 return $ret;
171 }
172
173 public function XmlErrorHandler( $errno, $errstr ) {
174 $this->wellFormed = false;
175 }
176
177 private function validate( $reader ) {
178 // First, move through anything that isn't an element, and
179 // handle any processing instructions with the callback
180 do {
181 if ( !$this->readNext( $reader ) ) {
182 // Hit the end of the document before any elements
183 $this->wellFormed = false;
184 return;
185 }
186 if ( $reader->nodeType === XMLReader::PI ) {
187 $this->processingInstructionHandler( $reader->name, $reader->value );
188 }
189 } while ( $reader->nodeType != XMLReader::ELEMENT );
190
191 // Process the rest of the document
192 do {
193 switch ( $reader->nodeType ) {
194 case XMLReader::ELEMENT:
195 $name = $this->expandNS(
196 $reader->name,
197 $reader->namespaceURI
198 );
199 if ( $this->rootElement === '' ) {
200 $this->rootElement = $name;
201 }
202 $empty = $reader->isEmptyElement;
203 $attrs = $this->getAttributesArray( $reader );
204 $this->elementOpen( $name, $attrs );
205 if ( $empty ) {
206 $this->elementClose();
207 }
208 break;
209
210 case XMLReader::END_ELEMENT:
211 $this->elementClose();
212 break;
213
214 case XMLReader::WHITESPACE:
215 case XMLReader::SIGNIFICANT_WHITESPACE:
216 case XMLReader::CDATA:
217 case XMLReader::TEXT:
218 $this->elementData( $reader->value );
219 break;
220
221 case XMLReader::ENTITY_REF:
222 // Unexpanded entity (maybe external?),
223 // don't send to the filter (xml_parse didn't)
224 break;
225
226 case XMLReader::COMMENT:
227 // Don't send to the filter (xml_parse didn't)
228 break;
229
230 case XMLReader::PI:
231 // Processing instructions can happen after the header too
232 $this->processingInstructionHandler(
233 $reader->name,
234 $reader->value
235 );
236 break;
237 default:
238 // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
239 // NOTATION, or XML_DECLARATION
240 // xml_parse didn't send these to the filter, so we won't.
241 }
242 } while ( $this->readNext( $reader ) );
243
244 if ( $this->stackDepth !== 0 ) {
245 $this->wellFormed = false;
246 } elseif ( $this->wellFormed === null ) {
247 $this->wellFormed = true;
248 }
249 }
250
251 /**
252 * Get all of the attributes for an XMLReader's current node
253 * @param $r XMLReader
254 * @return array of attributes
255 */
256 private function getAttributesArray( XMLReader $r ) {
257 $attrs = [];
258 while ( $r->moveToNextAttribute() ) {
259 if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
260 // XMLReader treats xmlns attributes as normal
261 // attributes, while xml_parse doesn't
262 continue;
263 }
264 $name = $this->expandNS( $r->name, $r->namespaceURI );
265 $attrs[$name] = $r->value;
266 }
267 return $attrs;
268 }
269
270 /**
271 * @param $name element or attribute name, maybe with a full or short prefix
272 * @param $namespaceURI the namespaceURI
273 * @return string the name prefixed with namespaceURI
274 */
275 private function expandNS( $name, $namespaceURI ) {
276 if ( $namespaceURI ) {
277 $parts = explode( ':', $name );
278 $localname = array_pop( $parts );
279 return "$namespaceURI:$localname";
280 }
281 return $name;
282 }
283
284 /**
285 * @param $name
286 * @param $attribs
287 */
288 private function elementOpen( $name, $attribs ) {
289 $this->elementDataContext[] = [ $name, $attribs ];
290 $this->elementData[] = '';
291 $this->stackDepth++;
292 }
293
294 /**
295 */
296 private function elementClose() {
297 list( $name, $attribs ) = array_pop( $this->elementDataContext );
298 $data = array_pop( $this->elementData );
299 $this->stackDepth--;
300 $callbackReturn = false;
301
302 if ( is_callable( $this->filterCallback ) ) {
303 $callbackReturn = call_user_func(
304 $this->filterCallback,
305 $name,
306 $attribs,
307 $data
308 );
309 }
310 if ( $callbackReturn ) {
311 // Filter hit!
312 $this->filterMatch = true;
313 $this->filterMatchType = $callbackReturn;
314 }
315 }
316
317 /**
318 * @param $data
319 */
320 private function elementData( $data ) {
321 // Collect any data here, and we'll run the callback in elementClose
322 $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
323 }
324
325 /**
326 * @param $target
327 * @param $data
328 */
329 private function processingInstructionHandler( $target, $data ) {
330 $callbackReturn = false;
331 if ( $this->parserOptions['processing_instruction_handler'] ) {
332 $callbackReturn = call_user_func(
333 $this->parserOptions['processing_instruction_handler'],
334 $target,
335 $data
336 );
337 }
338 if ( $callbackReturn ) {
339 // Filter hit!
340 $this->filterMatch = true;
341 $this->filterMatchType = $callbackReturn;
342 }
343 }
344 }