Merge "Verify parameter for MapCacheLRU::has() can be passed to array_key_exists()"
[lhc/web/wiklou.git] / includes / libs / XmlTypeCheck.php
1 <?php
2 /**
3 * XML syntax and type checker.
4 *
5 * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us
6 * more control over the expansion of XML entities. When passed to the
7 * callback, entities will be fully expanded, but may report the XML is
8 * invalid if expanding the entities are likely to cause a DoS.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 * http://www.gnu.org/copyleft/gpl.html
24 *
25 * @file
26 */
27
28 class XmlTypeCheck {
29 /**
30 * Will be set to true or false to indicate whether the file is
31 * well-formed XML. Note that this doesn't check schema validity.
32 */
33 public $wellFormed = null;
34
35 /**
36 * Will be set to true if the optional element filter returned
37 * a match at some point.
38 */
39 public $filterMatch = false;
40
41 /**
42 * Name of the document's root element, including any namespace
43 * as an expanded URL.
44 */
45 public $rootElement = '';
46
47 /**
48 * A stack of strings containing the data of each xml element as it's processed. Append
49 * data to the top string of the stack, then pop off the string and process it when the
50 * element is closed.
51 */
52 protected $elementData = array();
53
54 /**
55 * A stack of element names and attributes, as we process them.
56 */
57 protected $elementDataContext = array();
58
59 /**
60 * Current depth of the data stack.
61 */
62 protected $stackDepth = 0;
63
64 /**
65 * Additional parsing options
66 */
67 private $parserOptions = array(
68 'processing_instruction_handler' => '',
69 );
70
71 /**
72 * @param string $input a filename or string containing the XML element
73 * @param callable $filterCallback (optional)
74 * Function to call to do additional custom validity checks from the
75 * SAX element handler event. This gives you access to the element
76 * namespace, name, attributes, and text contents.
77 * Filter should return 'true' to toggle on $this->filterMatch
78 * @param bool $isFile (optional) indicates if the first parameter is a
79 * filename (default, true) or if it is a string (false)
80 * @param array $options list of additional parsing options:
81 * processing_instruction_handler: Callback for xml_set_processing_instruction_handler
82 */
83 function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) {
84 $this->filterCallback = $filterCallback;
85 $this->parserOptions = array_merge( $this->parserOptions, $options );
86 $this->validateFromInput( $input, $isFile );
87 }
88
89 /**
90 * Alternative constructor: from filename
91 *
92 * @param string $fname the filename of an XML document
93 * @param callable $filterCallback (optional)
94 * Function to call to do additional custom validity checks from the
95 * SAX element handler event. This gives you access to the element
96 * namespace, name, and attributes, but not to text contents.
97 * Filter should return 'true' to toggle on $this->filterMatch
98 * @return XmlTypeCheck
99 */
100 public static function newFromFilename( $fname, $filterCallback = null ) {
101 return new self( $fname, $filterCallback, true );
102 }
103
104 /**
105 * Alternative constructor: from string
106 *
107 * @param string $string a string containing an XML element
108 * @param callable $filterCallback (optional)
109 * Function to call to do additional custom validity checks from the
110 * SAX element handler event. This gives you access to the element
111 * namespace, name, and attributes, but not to text contents.
112 * Filter should return 'true' to toggle on $this->filterMatch
113 * @return XmlTypeCheck
114 */
115 public static function newFromString( $string, $filterCallback = null ) {
116 return new self( $string, $filterCallback, false );
117 }
118
119 /**
120 * Get the root element. Simple accessor to $rootElement
121 *
122 * @return string
123 */
124 public function getRootElement() {
125 return $this->rootElement;
126 }
127
128
129 /**
130 * @param string $fname the filename
131 */
132 private function validateFromInput( $xml, $isFile ) {
133 $reader = new XMLReader();
134 if ( $isFile ) {
135 $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
136 } else {
137 $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
138 }
139 if ( $s !== true ) {
140 // Couldn't open the XML
141 $this->wellFormed = false;
142 } else {
143 $oldDisable = libxml_disable_entity_loader( true );
144 $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
145 try {
146 $this->validate( $reader );
147 } catch ( Exception $e ) {
148 // Calling this malformed, because we didn't parse the whole
149 // thing. Maybe just an external entity refernce.
150 $this->wellFormed = false;
151 $reader->close();
152 libxml_disable_entity_loader( $oldDisable );
153 throw $e;
154 }
155 $reader->close();
156 libxml_disable_entity_loader( $oldDisable );
157 }
158 }
159
160 private function readNext( XMLReader $reader ) {
161 set_error_handler( array( $this, 'XmlErrorHandler' ) );
162 $ret = $reader->read();
163 restore_error_handler();
164 return $ret;
165 }
166
167 public function XmlErrorHandler( $errno, $errstr ) {
168 $this->wellFormed = false;
169 }
170
171 private function validate( $reader ) {
172
173 // First, move through anything that isn't an element, and
174 // handle any processing instructions with the callback
175 do {
176 if( !$this->readNext( $reader ) ) {
177 // Hit the end of the document before any elements
178 $this->wellFormed = false;
179 return;
180 }
181 if ( $reader->nodeType === XMLReader::PI ) {
182 $this->processingInstructionHandler( $reader->name, $reader->value );
183 }
184 } while ( $reader->nodeType != XMLReader::ELEMENT );
185
186 // Process the rest of the document
187 do {
188 switch ( $reader->nodeType ) {
189 case XMLReader::ELEMENT:
190 $name = $this->expandNS(
191 $reader->name,
192 $reader->namespaceURI
193 );
194 if ( $this->rootElement === '' ) {
195 $this->rootElement = $name;
196 }
197 $empty = $reader->isEmptyElement;
198 $attrs = $this->getAttributesArray( $reader );
199 $this->elementOpen( $name, $attrs );
200 if ( $empty ) {
201 $this->elementClose();
202 }
203 break;
204
205 case XMLReader::END_ELEMENT:
206 $this->elementClose();
207 break;
208
209 case XMLReader::WHITESPACE:
210 case XMLReader::SIGNIFICANT_WHITESPACE:
211 case XMLReader::CDATA:
212 case XMLReader::TEXT:
213 $this->elementData( $reader->value );
214 break;
215
216 case XMLReader::ENTITY_REF:
217 // Unexpanded entity (maybe external?),
218 // don't send to the filter (xml_parse didn't)
219 break;
220
221 case XMLReader::COMMENT:
222 // Don't send to the filter (xml_parse didn't)
223 break;
224
225 case XMLReader::PI:
226 // Processing instructions can happen after the header too
227 $this->processingInstructionHandler(
228 $reader->name,
229 $reader->value
230 );
231 break;
232 default:
233 // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
234 // NOTATION, or XML_DECLARATION
235 // xml_parse didn't send these to the filter, so we won't.
236 }
237
238 } while ( $this->readNext( $reader ) );
239
240 if ( $this->stackDepth !== 0 ) {
241 $this->wellFormed = false;
242 } elseif ( $this->wellFormed === null ) {
243 $this->wellFormed = true;
244 }
245
246 }
247
248 /**
249 * Get all of the attributes for an XMLReader's current node
250 * @param $r XMLReader
251 * @return array of attributes
252 */
253 private function getAttributesArray( XMLReader $r ) {
254 $attrs = array();
255 while ( $r->moveToNextAttribute() ) {
256 if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
257 // XMLReader treats xmlns attributes as normal
258 // attributes, while xml_parse doesn't
259 continue;
260 }
261 $name = $this->expandNS( $r->name, $r->namespaceURI );
262 $attrs[$name] = $r->value;
263 }
264 return $attrs;
265 }
266
267 /**
268 * @param $name element or attribute name, maybe with a full or short prefix
269 * @param $namespaceURI the namespaceURI
270 * @return string the name prefixed with namespaceURI
271 */
272 private function expandNS( $name, $namespaceURI ) {
273 if ( $namespaceURI ) {
274 $parts = explode( ':', $name );
275 $localname = array_pop( $parts );
276 return "$namespaceURI:$localname";
277 }
278 return $name;
279 }
280
281 /**
282 * @param $name
283 * @param $attribs
284 */
285 private function elementOpen( $name, $attribs ) {
286 $this->elementDataContext[] = array( $name, $attribs );
287 $this->elementData[] = '';
288 $this->stackDepth++;
289 }
290
291 /**
292 */
293 private function elementClose() {
294 list( $name, $attribs ) = array_pop( $this->elementDataContext );
295 $data = array_pop( $this->elementData );
296 $this->stackDepth--;
297
298 if ( is_callable( $this->filterCallback )
299 && call_user_func(
300 $this->filterCallback,
301 $name,
302 $attribs,
303 $data
304 )
305 ) {
306 // Filter hit
307 $this->filterMatch = true;
308 }
309 }
310
311 /**
312 * @param $data
313 */
314 private function elementData( $data ) {
315 // Collect any data here, and we'll run the callback in elementClose
316 $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
317 }
318
319 /**
320 * @param $target
321 * @param $data
322 */
323 private function processingInstructionHandler( $target, $data ) {
324 if ( $this->parserOptions['processing_instruction_handler'] ) {
325 if ( call_user_func(
326 $this->parserOptions['processing_instruction_handler'],
327 $target,
328 $data
329 ) ) {
330 // Filter hit!
331 $this->filterMatch = true;
332 }
333 }
334 }
335 }