Enable entity loader and handle errors nicely in WikiImporter constructor
authorThis, that and the other <at.light@live.com.au>
Tue, 13 Jan 2015 05:57:07 +0000 (16:57 +1100)
committerThis, that and the other <at.light@live.com.au>
Sat, 11 Apr 2015 01:40:17 +0000 (11:40 +1000)
Two issues being addressed here:
* Slightly friendlier message (instead of fatal) if libxml is not present
* Need to make sure the entity loader is enabled when opening XML documents

Also provide an error message when XMLReader::open fails, as otherwise,
the user sees cryptic errors from code that tries to use the (unopened)
XMLReader.

Bug: T45868
Bug: T86036
Change-Id: Ibcccce9f09f87b17c3093fd0c3c3ff74d7dc6cb7

includes/Import.php

index 4dfe830..1e0f8e2 100644 (file)
@@ -49,8 +49,13 @@ class WikiImporter {
         * Creates an ImportXMLReader drawing from the source provided
         * @param ImportSource $source
         * @param Config $config
+        * @throws Exception
         */
        function __construct( ImportSource $source, Config $config = null ) {
+               if ( !class_exists( 'XMLReader' ) ) {
+                       throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
+               }
+
                $this->reader = new XMLReader();
                if ( !$config ) {
                        wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
@@ -62,11 +67,22 @@ class WikiImporter {
                        stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
                }
                $id = UploadSourceAdapter::registerSource( $source );
+
+               // Enable the entity loader, as it is needed for loading external URLs via
+               // XMLReader::open (T86036)
+               $oldDisable = libxml_disable_entity_loader( false );
                if ( defined( 'LIBXML_PARSEHUGE' ) ) {
-                       $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
+                       $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
                } else {
-                       $this->reader->open( "uploadsource://$id" );
+                       $status = $this->reader->open( "uploadsource://$id" );
                }
+               if ( !$status ) {
+                       $error = libxml_get_last_error();
+                       libxml_disable_entity_loader( $oldDisable );
+                       throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
+                               $error->message );
+               }
+               libxml_disable_entity_loader( $oldDisable );
 
                // Default callbacks
                $this->setPageCallback( array( $this, 'beforeImportPage' ) );