Add null check in WikiImporter
[lhc/web/wiklou.git] / includes / Import.php
index 36028ea..06355ed 100644 (file)
@@ -42,6 +42,8 @@ class WikiImporter {
        private $config;
        /** @var ImportTitleFactory */
        private $importTitleFactory;
+       /** @var array */
+       private $countableCache = array();
 
        /**
         * Creates an ImportXMLReader drawing from the source provided
@@ -67,6 +69,7 @@ class WikiImporter {
                }
 
                // Default callbacks
+               $this->setPageCallback( array( $this, 'beforeImportPage' ) );
                $this->setRevisionCallback( array( $this, "importRevision" ) );
                $this->setUploadCallback( array( $this, 'importUpload' ) );
                $this->setLogItemCallback( array( $this, 'importLogItem' ) );
@@ -288,6 +291,19 @@ class WikiImporter {
                $this->mImportUploads = $import;
        }
 
+       /**
+        * Default per-page callback. Sets up some things related to site statistics
+        * @param array $titleAndForeignTitle Two-element array, with Title object at
+        * index 0 and ForeignTitle object at index 1
+        * @return bool
+        */
+       public function beforeImportPage( $titleAndForeignTitle ) {
+               $title = $titleAndForeignTitle[0];
+               $page = WikiPage::factory( $title );
+               $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
+               return true;
+       }
+
        /**
         * Default per-revision callback, performs the import.
         * @param WikiRevision $revision
@@ -349,6 +365,31 @@ class WikiImporter {
         */
        public function finishImportPage( $title, $foreignTitle, $revCount,
                        $sRevCount, $pageInfo ) {
+
+               // Update article count statistics (T42009)
+               // The normal counting logic in WikiPage->doEditUpdates() is designed for
+               // one-revision-at-a-time editing, not bulk imports. In this situation it
+               // suffers from issues of slave lag. We let WikiPage handle the total page
+               // and revision count, and we implement our own custom logic for the
+               // article (content page) count.
+               $page = WikiPage::factory( $title );
+               $page->loadPageData( 'fromdbmaster' );
+               $content = $page->getContent();
+               if ( $content === null ) {
+                       wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
+                               ' because WikiPage::getContent() returned null' );
+               } else {
+                       $editInfo = $page->prepareContentForEdit( $content );
+                       $countKey = 'title_' . $title->getPrefixedText();
+                       $countable = $page->isCountable( $editInfo );
+                       if ( array_key_exists( $countKey, $this->countableCache ) &&
+                               $countable != $this->countableCache[ $countKey ] ) {
+                               DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array(
+                                       'articles' => ( (int)$countable - (int)$this->countableCache[ $countKey ] )
+                               ) ) );
+                       }
+               }
+
                $args = func_get_args();
                return Hooks::run( 'AfterImportPage', $args );
        }
@@ -441,7 +482,8 @@ class WikiImporter {
        /**
         * Retrieves the contents of the named attribute of the current element.
         * @param string $attr The name of the attribute
-        * @return string The value of the attribute or an empty string if it is not set in the current element.
+        * @return string The value of the attribute or an empty string if it is not set in the current
+        * element.
         */
        public function nodeAttribute( $attr ) {
                return $this->reader->getAttribute( $attr );
@@ -497,36 +539,48 @@ class WikiImporter {
 
                $keepReading = $this->reader->read();
                $skip = false;
-               while ( $keepReading ) {
-                       $tag = $this->reader->name;
-                       $type = $this->reader->nodeType;
-
-                       if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
-                               // Do nothing
-                       } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
-                               break;
-                       } elseif ( $tag == 'siteinfo' ) {
-                               $this->handleSiteInfo();
-                       } elseif ( $tag == 'page' ) {
-                               $this->handlePage();
-                       } elseif ( $tag == 'logitem' ) {
-                               $this->handleLogItem();
-                       } elseif ( $tag != '#text' ) {
-                               $this->warn( "Unhandled top-level XML tag $tag" );
-
-                               $skip = true;
-                       }
+               $rethrow = null;
+               try {
+                       while ( $keepReading ) {
+                               $tag = $this->reader->name;
+                               $type = $this->reader->nodeType;
+
+                               if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
+                                       // Do nothing
+                               } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
+                                       break;
+                               } elseif ( $tag == 'siteinfo' ) {
+                                       $this->handleSiteInfo();
+                               } elseif ( $tag == 'page' ) {
+                                       $this->handlePage();
+                               } elseif ( $tag == 'logitem' ) {
+                                       $this->handleLogItem();
+                               } elseif ( $tag != '#text' ) {
+                                       $this->warn( "Unhandled top-level XML tag $tag" );
+
+                                       $skip = true;
+                               }
 
-                       if ( $skip ) {
-                               $keepReading = $this->reader->next();
-                               $skip = false;
-                               $this->debug( "Skip" );
-                       } else {
-                               $keepReading = $this->reader->read();
+                               if ( $skip ) {
+                                       $keepReading = $this->reader->next();
+                                       $skip = false;
+                                       $this->debug( "Skip" );
+                               } else {
+                                       $keepReading = $this->reader->read();
+                               }
                        }
+               } catch ( Exception $ex ) {
+                       $rethrow = $ex;
                }
 
+               // finally
                libxml_disable_entity_loader( $oldDisable );
+               $this->reader->close();
+
+               if ( $rethrow ) {
+                       throw $rethrow;
+               }
+
                return true;
        }
 
@@ -1532,7 +1586,6 @@ class WikiRevision {
                                        $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
                                return false;
                        }
-                       $oldcountable = $page->isCountable();
                }
 
                # @todo FIXME: Use original rev_id optionally (better for backups)
@@ -1555,10 +1608,11 @@ class WikiRevision {
 
                if ( $changed !== false && !$this->mNoUpdates ) {
                        wfDebug( __METHOD__ . ": running updates\n" );
+                       // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
                        $page->doEditUpdates(
                                $revision,
                                $userObj,
-                               array( 'created' => $created, 'oldcountable' => $oldcountable )
+                               array( 'created' => $created, 'oldcountable' => 'no-change' )
                        );
                }
 
@@ -1621,6 +1675,7 @@ class WikiRevision {
                                RepoGroup::singleton()->getLocalRepo(), $archiveName );
                } else {
                        $file = wfLocalFile( $this->getTitle() );
+                       $file->load( File::READ_LATEST );
                        wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
                        if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
                                $archiveName = $file->getTimestamp() . '!' . $file->getName();
@@ -1692,7 +1747,7 @@ class WikiRevision {
 
                // @todo FIXME!
                $src = $this->getSrc();
-               $data = Http::get( $src );
+               $data = Http::get( $src, array(), __METHOD__ );
                if ( !$data ) {
                        wfDebug( "IMPORT: couldn't fetch source $src\n" );
                        fclose( $f );
@@ -1849,7 +1904,7 @@ class ImportStreamSource implements ImportSource {
                # quicker and sorts out user-agent problems which might
                # otherwise prevent importing from large sites, such
                # as the Wikimedia cluster, etc.
-               $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
+               $data = Http::request( $method, $url, array( 'followRedirects' => true ), __METHOD__ );
                if ( $data !== false ) {
                        $file = tmpfile();
                        fwrite( $file, $data );