Clean up state of libxml on failed import.
authordaniel <daniel.kinzler@wikimedia.de>
Wed, 11 Feb 2015 11:06:25 +0000 (12:06 +0100)
committerDaniel Kinzler <daniel.kinzler@wikimedia.de>
Wed, 11 Feb 2015 11:27:21 +0000 (11:27 +0000)
Make sure we always call XMLReader::close() to clean up libxml's internal state,
even if import fails with an exception. Otherwise, any subsequent attempt at importing
(or otherwise using an XMLReader) will fail with:

  XMLReader::open(): Unable to open source data

This is particularly annoying for unit tests which should be allowed to fail
without dragging down subsequent tests. Even more importantly, they may
explicitly be testing a failure case, which should not cause subsequent tests
to fail.

NOTE: Wikibase patch Id035ecebebb67 is blocked on this,
please re-check once this is merged.

Change-Id: I31c014df39aa11c11ded70050ef12a8e2c5fefc5

includes/Import.php

index 36028ea..7eff5da 100644 (file)
@@ -497,36 +497,48 @@ class WikiImporter {
 
                $keepReading = $this->reader->read();
                $skip = false;
-               while ( $keepReading ) {
-                       $tag = $this->reader->name;
-                       $type = $this->reader->nodeType;
-
-                       if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
-                               // Do nothing
-                       } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
-                               break;
-                       } elseif ( $tag == 'siteinfo' ) {
-                               $this->handleSiteInfo();
-                       } elseif ( $tag == 'page' ) {
-                               $this->handlePage();
-                       } elseif ( $tag == 'logitem' ) {
-                               $this->handleLogItem();
-                       } elseif ( $tag != '#text' ) {
-                               $this->warn( "Unhandled top-level XML tag $tag" );
-
-                               $skip = true;
-                       }
+               $rethrow = null;
+               try {
+                       while ( $keepReading ) {
+                               $tag = $this->reader->name;
+                               $type = $this->reader->nodeType;
+
+                               if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
+                                       // Do nothing
+                               } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
+                                       break;
+                               } elseif ( $tag == 'siteinfo' ) {
+                                       $this->handleSiteInfo();
+                               } elseif ( $tag == 'page' ) {
+                                       $this->handlePage();
+                               } elseif ( $tag == 'logitem' ) {
+                                       $this->handleLogItem();
+                               } elseif ( $tag != '#text' ) {
+                                       $this->warn( "Unhandled top-level XML tag $tag" );
+
+                                       $skip = true;
+                               }
 
-                       if ( $skip ) {
-                               $keepReading = $this->reader->next();
-                               $skip = false;
-                               $this->debug( "Skip" );
-                       } else {
-                               $keepReading = $this->reader->read();
+                               if ( $skip ) {
+                                       $keepReading = $this->reader->next();
+                                       $skip = false;
+                                       $this->debug( "Skip" );
+                               } else {
+                                       $keepReading = $this->reader->read();
+                               }
                        }
+               } catch ( Exception $ex ) {
+                       $rethrow = $ex;
                }
 
+               // finally
                libxml_disable_entity_loader( $oldDisable );
+               $this->reader->close();
+
+               if ( $rethrow ) {
+                       throw $rethrow;
+               }
+
                return true;
        }