Merge "Removed old HTMLCacheUpdateJob b/c code"
[lhc/web/wiklou.git] / includes / Import.php
index c3caecc..c036fbe 100644 (file)
@@ -42,13 +42,15 @@ class WikiImporter {
        private $config;
        /** @var ImportTitleFactory */
        private $importTitleFactory;
+       /** @var array */
+       private $countableCache = array();
 
        /**
         * Creates an ImportXMLReader drawing from the source provided
-        * @param ImportStreamSource $source
+        * @param ImportSource $source
         * @param Config $config
         */
-       function __construct( ImportStreamSource $source, Config $config = null ) {
+       function __construct( ImportSource $source, Config $config = null ) {
                $this->reader = new XMLReader();
                if ( !$config ) {
                        wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
@@ -67,6 +69,7 @@ class WikiImporter {
                }
 
                // Default callbacks
+               $this->setPageCallback( array( $this, 'beforeImportPage' ) );
                $this->setRevisionCallback( array( $this, "importRevision" ) );
                $this->setUploadCallback( array( $this, 'importUpload' ) );
                $this->setLogItemCallback( array( $this, 'importLogItem' ) );
@@ -288,6 +291,19 @@ class WikiImporter {
                $this->mImportUploads = $import;
        }
 
+       /**
+        * Default per-page callback. Sets up some things related to site statistics
+        * @param array $titleAndForeignTitle Two-element array, with Title object at
+        * index 0 and ForeignTitle object at index 1
+        * @return bool
+        */
+       public function beforeImportPage( $titleAndForeignTitle ) {
+               $title = $titleAndForeignTitle[0];
+               $page = WikiPage::factory( $title );
+               $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
+               return true;
+       }
+
        /**
         * Default per-revision callback, performs the import.
         * @param WikiRevision $revision
@@ -349,6 +365,26 @@ class WikiImporter {
         */
        public function finishImportPage( $title, $foreignTitle, $revCount,
                        $sRevCount, $pageInfo ) {
+
+               // Update article count statistics (T42009)
+               // The normal counting logic in WikiPage->doEditUpdates() is designed for
+               // one-revision-at-a-time editing, not bulk imports. In this situation it
+               // suffers from issues of slave lag. We let WikiPage handle the total page
+               // and revision count, and we implement our own custom logic for the
+               // article (content page) count.
+               $page = WikiPage::factory( $title );
+               $page->loadPageData( 'fromdbmaster' );
+               $content = $page->getContent();
+               $editInfo = $page->prepareContentForEdit( $content );
+               $countKey = 'title_' . $title->getPrefixedText();
+               $countable = $page->isCountable( $editInfo );
+               if ( array_key_exists( $countKey, $this->countableCache ) &&
+                       $countable != $this->countableCache[ $countKey ] ) {
+                       DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array(
+                               'articles' => ( (int)$countable - (int)$this->countableCache[ $countKey ] )
+                       ) ) );
+               }
+
                $args = func_get_args();
                return Hooks::run( 'AfterImportPage', $args );
        }
@@ -441,7 +477,8 @@ class WikiImporter {
        /**
         * Retrieves the contents of the named attribute of the current element.
         * @param string $attr The name of the attribute
-        * @return string The value of the attribute or an empty string if it is not set in the current element.
+        * @return string The value of the attribute or an empty string if it is not set in the current
+        * element.
         */
        public function nodeAttribute( $attr ) {
                return $this->reader->getAttribute( $attr );
@@ -497,36 +534,48 @@ class WikiImporter {
 
                $keepReading = $this->reader->read();
                $skip = false;
-               while ( $keepReading ) {
-                       $tag = $this->reader->name;
-                       $type = $this->reader->nodeType;
-
-                       if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
-                               // Do nothing
-                       } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
-                               break;
-                       } elseif ( $tag == 'siteinfo' ) {
-                               $this->handleSiteInfo();
-                       } elseif ( $tag == 'page' ) {
-                               $this->handlePage();
-                       } elseif ( $tag == 'logitem' ) {
-                               $this->handleLogItem();
-                       } elseif ( $tag != '#text' ) {
-                               $this->warn( "Unhandled top-level XML tag $tag" );
-
-                               $skip = true;
-                       }
+               $rethrow = null;
+               try {
+                       while ( $keepReading ) {
+                               $tag = $this->reader->name;
+                               $type = $this->reader->nodeType;
+
+                               if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
+                                       // Do nothing
+                               } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
+                                       break;
+                               } elseif ( $tag == 'siteinfo' ) {
+                                       $this->handleSiteInfo();
+                               } elseif ( $tag == 'page' ) {
+                                       $this->handlePage();
+                               } elseif ( $tag == 'logitem' ) {
+                                       $this->handleLogItem();
+                               } elseif ( $tag != '#text' ) {
+                                       $this->warn( "Unhandled top-level XML tag $tag" );
+
+                                       $skip = true;
+                               }
 
-                       if ( $skip ) {
-                               $keepReading = $this->reader->next();
-                               $skip = false;
-                               $this->debug( "Skip" );
-                       } else {
-                               $keepReading = $this->reader->read();
+                               if ( $skip ) {
+                                       $keepReading = $this->reader->next();
+                                       $skip = false;
+                                       $this->debug( "Skip" );
+                               } else {
+                                       $keepReading = $this->reader->read();
+                               }
                        }
+               } catch ( Exception $ex ) {
+                       $rethrow = $ex;
                }
 
+               // finally
                libxml_disable_entity_loader( $oldDisable );
+               $this->reader->close();
+
+               if ( $rethrow ) {
+                       throw $rethrow;
+               }
+
                return true;
        }
 
@@ -967,10 +1016,10 @@ class UploadSourceAdapter {
        private $mPosition;
 
        /**
-        * @param ImportStreamSource $source
+        * @param ImportSource $source
         * @return string
         */
-       static function registerSource( ImportStreamSource $source ) {
+       static function registerSource( ImportSource $source ) {
                $id = wfRandomString();
 
                self::$sourceRegistrations[$id] = $source;
@@ -1532,7 +1581,6 @@ class WikiRevision {
                                        $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
                                return false;
                        }
-                       $oldcountable = $page->isCountable();
                }
 
                # @todo FIXME: Use original rev_id optionally (better for backups)
@@ -1555,10 +1603,11 @@ class WikiRevision {
 
                if ( $changed !== false && !$this->mNoUpdates ) {
                        wfDebug( __METHOD__ . ": running updates\n" );
+                       // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
                        $page->doEditUpdates(
                                $revision,
                                $userObj,
-                               array( 'created' => $created, 'oldcountable' => $oldcountable )
+                               array( 'created' => $created, 'oldcountable' => 'no-change' )
                        );
                }
 
@@ -1670,7 +1719,7 @@ class WikiRevision {
                        wfDebug( __METHOD__ . ": Successful\n" );
                        return true;
                } else {
-                       wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
+                       wfDebug( __METHOD__ . ': failed: ' . $status->getHTML() . "\n" );
                        return false;
                }
        }
@@ -1708,6 +1757,30 @@ class WikiRevision {
 
 }
 
+/**
+ * Source interface for XML import.
+ */
+interface ImportSource {
+
+       /**
+        * Indicates whether the end of the input has been reached.
+        * Will return true after a finite number of calls to readChunk.
+        *
+        * @return bool true if there is no more input, false otherwise.
+        */
+       function atEnd();
+
+       /**
+        * Return a chunk of the input, as a (possibly empty) string.
+        * When the end of input is reached, readChunk() returns false.
+        * If atEnd() returns false, readChunk() will return a string.
+        * If atEnd() returns true, readChunk() will return false.
+        *
+        * @return bool|string
+        */
+       function readChunk();
+}
+
 /**
  * Used for importing XML dumps where the content of the dump is in a string.
  * This class is ineffecient, and should only be used for small dumps.
@@ -1715,7 +1788,7 @@ class WikiRevision {
  *
  * @ingroup SpecialPage
  */
-class ImportStringSource {
+class ImportStringSource implements ImportSource {
        function __construct( $string ) {
                $this->mString = $string;
                $this->mRead = false;
@@ -1744,7 +1817,7 @@ class ImportStringSource {
  * Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
  * @ingroup SpecialPage
  */
-class ImportStreamSource {
+class ImportStreamSource implements ImportSource {
        function __construct( $handle ) {
                $this->mHandle = $handle;
        }