Added a separate error message for mkdir failures
[lhc/web/wiklou.git] / includes / libs / XmlTypeCheck.php
index 0d6c3a6..34afb68 100644 (file)
@@ -2,6 +2,11 @@
 /**
  * XML syntax and type checker.
  *
+ * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us
+ * more control over the expansion of XML entities. When passed to the
+ * callback, entities will be fully expanded, but may report the XML is
+ * invalid if expanding the entities are likely to cause a DoS.
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -25,7 +30,7 @@ class XmlTypeCheck {
         * Will be set to true or false to indicate whether the file is
         * well-formed XML. Note that this doesn't check schema validity.
         */
-       public $wellFormed = false;
+       public $wellFormed = null;
 
        /**
         * Will be set to true if the optional element filter returned
@@ -33,6 +38,13 @@ class XmlTypeCheck {
         */
        public $filterMatch = false;
 
+       /**
+        * Will contain the type of filter hit if the optional element filter returned
+        * a match at some point.
+        * @var mixed
+        */
+       public $filterMatchType = false;
+
        /**
         * Name of the document's root element, including any namespace
         * as an expanded URL.
@@ -78,12 +90,7 @@ class XmlTypeCheck {
        function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) {
                $this->filterCallback = $filterCallback;
                $this->parserOptions = array_merge( $this->parserOptions, $options );
-
-               if ( $isFile ) {
-                       $this->validateFromFile( $input );
-               } else {
-                       $this->validateFromString( $input );
-               }
+               $this->validateFromInput( $input, $isFile );
        }
 
        /**
@@ -125,140 +132,217 @@ class XmlTypeCheck {
                return $this->rootElement;
        }
 
+
        /**
-        * Get an XML parser with the root element handler.
-        * @see XmlTypeCheck::rootElementOpen()
-        * @return resource a resource handle for the XML parser
+        * @param string $fname the filename
         */
-       private function getParser() {
-               $parser = xml_parser_create_ns( 'UTF-8' );
-               // case folding violates XML standard, turn it off
-               xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
-               xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
-               if ( $this->parserOptions['processing_instruction_handler'] ) {
-                       xml_set_processing_instruction_handler(
-                               $parser,
-                               array( $this, 'processingInstructionHandler' )
-                       );
+       private function validateFromInput( $xml, $isFile ) {
+               $reader = new XMLReader();
+               if ( $isFile ) {
+                       $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+               } else {
+                       $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+               }
+               if ( $s !== true ) {
+                       // Couldn't open the XML
+                       $this->wellFormed = false;
+               } else {
+                       $oldDisable = libxml_disable_entity_loader( true );
+                       $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
+                       try {
+                               $this->validate( $reader );
+                       } catch ( Exception $e ) {
+                               // Calling this malformed, because we didn't parse the whole
+                               // thing. Maybe just an external entity refernce.
+                               $this->wellFormed = false;
+                               $reader->close();
+                               libxml_disable_entity_loader( $oldDisable );
+                               throw $e;
+                       }
+                       $reader->close();
+                       libxml_disable_entity_loader( $oldDisable );
                }
-               return $parser;
        }
 
-       /**
-        * @param string $fname the filename
-        */
-       private function validateFromFile( $fname ) {
-               $parser = $this->getParser();
-
-               if ( file_exists( $fname ) ) {
-                       $file = fopen( $fname, "rb" );
-                       if ( $file ) {
-                               do {
-                                       $chunk = fread( $file, 32768 );
-                                       $ret = xml_parse( $parser, $chunk, feof( $file ) );
-                                       if ( $ret == 0 ) {
-                                               $this->wellFormed = false;
-                                               fclose( $file );
-                                               xml_parser_free( $parser );
-                                               return;
+       private function readNext( XMLReader $reader ) {
+               set_error_handler( array( $this, 'XmlErrorHandler' ) );
+               $ret = $reader->read();
+               restore_error_handler();
+               return $ret;
+       }
+
+       public function XmlErrorHandler( $errno, $errstr ) {
+               $this->wellFormed = false;
+       }
+
+       private function validate( $reader ) {
+
+               // First, move through anything that isn't an element, and
+               // handle any processing instructions with the callback
+               do {
+                       if ( !$this->readNext( $reader ) ) {
+                               // Hit the end of the document before any elements
+                               $this->wellFormed = false;
+                               return;
+                       }
+                       if ( $reader->nodeType === XMLReader::PI ) {
+                               $this->processingInstructionHandler( $reader->name, $reader->value );
+                       }
+               } while ( $reader->nodeType != XMLReader::ELEMENT );
+
+               // Process the rest of the document
+               do {
+                       switch ( $reader->nodeType ) {
+                               case XMLReader::ELEMENT:
+                                       $name = $this->expandNS(
+                                               $reader->name,
+                                               $reader->namespaceURI
+                                       );
+                                       if ( $this->rootElement === '' ) {
+                                               $this->rootElement = $name;
+                                       }
+                                       $empty = $reader->isEmptyElement;
+                                       $attrs = $this->getAttributesArray( $reader );
+                                       $this->elementOpen( $name, $attrs );
+                                       if ( $empty ) {
+                                               $this->elementClose();
                                        }
-                               } while ( !feof( $file ) );
+                                       break;
+
+                               case XMLReader::END_ELEMENT:
+                                       $this->elementClose();
+                                       break;
+
+                               case XMLReader::WHITESPACE:
+                               case XMLReader::SIGNIFICANT_WHITESPACE:
+                               case XMLReader::CDATA:
+                               case XMLReader::TEXT:
+                                       $this->elementData( $reader->value );
+                                       break;
+
+                               case XMLReader::ENTITY_REF:
+                                       // Unexpanded entity (maybe external?),
+                                       // don't send to the filter (xml_parse didn't)
+                                       break;
 
-                               fclose( $file );
+                               case XMLReader::COMMENT:
+                                       // Don't send to the filter (xml_parse didn't)
+                                       break;
+
+                               case XMLReader::PI:
+                                       // Processing instructions can happen after the header too
+                                       $this->processingInstructionHandler(
+                                               $reader->name,
+                                               $reader->value
+                                       );
+                                       break;
+                               default:
+                                       // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
+                                       // NOTATION, or XML_DECLARATION
+                                       // xml_parse didn't send these to the filter, so we won't.
                        }
+
+               } while ( $this->readNext( $reader ) );
+
+               if ( $this->stackDepth !== 0 ) {
+                       $this->wellFormed = false;
+               } elseif ( $this->wellFormed === null ) {
+                       $this->wellFormed = true;
                }
-               $this->wellFormed = true;
 
-               xml_parser_free( $parser );
        }
 
        /**
-        *
-        * @param string $string the XML-input-string to be checked.
+        * Get all of the attributes for an XMLReader's current node
+        * @param $r XMLReader
+        * @return array of attributes
         */
-       private function validateFromString( $string ) {
-               $parser = $this->getParser();
-               $ret = xml_parse( $parser, $string, true );
-               xml_parser_free( $parser );
-               if ( $ret == 0 ) {
-                       $this->wellFormed = false;
-                       return;
+       private function getAttributesArray( XMLReader $r ) {
+               $attrs = array();
+               while ( $r->moveToNextAttribute() ) {
+                       if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
+                               // XMLReader treats xmlns attributes as normal
+                               // attributes, while xml_parse doesn't
+                               continue;
+                       }
+                       $name = $this->expandNS( $r->name, $r->namespaceURI );
+                       $attrs[$name] = $r->value;
                }
-               $this->wellFormed = true;
+               return $attrs;
        }
 
        /**
-        * @param $parser
-        * @param $name
-        * @param $attribs
+        * @param $name element or attribute name, maybe with a full or short prefix
+        * @param $namespaceURI the namespaceURI
+        * @return string the name prefixed with namespaceURI
         */
-       private function rootElementOpen( $parser, $name, $attribs ) {
-               $this->rootElement = $name;
-
-               if ( is_callable( $this->filterCallback ) ) {
-                       xml_set_element_handler(
-                               $parser,
-                               array( $this, 'elementOpen' ),
-                               array( $this, 'elementClose' )
-                       );
-                       xml_set_character_data_handler( $parser, array( $this, 'elementData' ) );
-                       $this->elementOpen( $parser, $name, $attribs );
-               } else {
-                       // We only need the first open element
-                       xml_set_element_handler( $parser, false, false );
+       private function expandNS( $name, $namespaceURI ) {
+               if ( $namespaceURI ) {
+                       $parts = explode( ':', $name );
+                       $localname = array_pop( $parts );
+                       return "$namespaceURI:$localname";
                }
+               return $name;
        }
 
        /**
-        * @param $parser
         * @param $name
         * @param $attribs
         */
-       private function elementOpen( $parser, $name, $attribs ) {
+       private function elementOpen( $name, $attribs ) {
                $this->elementDataContext[] = array( $name, $attribs );
                $this->elementData[] = '';
                $this->stackDepth++;
        }
 
        /**
-        * @param $parser
-        * @param $name
         */
-       private function elementClose( $parser, $name ) {
+       private function elementClose() {
                list( $name, $attribs ) = array_pop( $this->elementDataContext );
                $data = array_pop( $this->elementData );
                $this->stackDepth--;
+               $callbackReturn = false;
 
-               if ( call_user_func(
-                       $this->filterCallback,
-                       $name,
-                       $attribs,
-                       $data
-               ) ) {
+               if ( is_callable( $this->filterCallback ) ) {
+                       $callbackReturn = call_user_func(
+                               $this->filterCallback,
+                               $name,
+                               $attribs,
+                               $data
+                       );
+               }
+               if ( $callbackReturn ) {
                        // Filter hit!
                        $this->filterMatch = true;
+                       $this->filterMatchType = $callbackReturn;
                }
        }
 
        /**
-        * @param $parser
         * @param $data
         */
-       private function elementData( $parser, $data ) {
-               // xml_set_character_data_handler breaks the data on & characters, so
-               // we collect any data here, and we'll run the callback in elementClose
+       private function elementData( $data ) {
+               // Collect any data here, and we'll run the callback in elementClose
                $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
        }
 
        /**
-        * @param $parser
         * @param $target
         * @param $data
         */
-       private function processingInstructionHandler( $parser, $target, $data ) {
-               if ( call_user_func( $this->parserOptions['processing_instruction_handler'], $target, $data ) ) {
+       private function processingInstructionHandler( $target, $data ) {
+               $callbackReturn = false;
+               if ( $this->parserOptions['processing_instruction_handler'] ) {
+                       $callbackReturn = call_user_func(
+                               $this->parserOptions['processing_instruction_handler'],
+                               $target,
+                               $data
+                       );
+               }
+               if ( $callbackReturn ) {
                        // Filter hit!
                        $this->filterMatch = true;
+                       $this->filterMatchType = $callbackReturn;
                }
        }
 }