Merge "Removed old HTMLCacheUpdateJob b/c code"

[lhc/web/wiklou.git] / includes / Import.php
diff --git a/includes/Import.php b/includes/Import.php

index c3caecc..c036fbe 100644 (file)
--- a/includes/Import.php
+++ b/includes/Import.php
@@ -42,13 +42,15 @@ class WikiImporter {
         private $config;
         /** @var ImportTitleFactory */
         private $importTitleFactory;
+       /** @var array */
+       private $countableCache = array();
  
         /**
          * Creates an ImportXMLReader drawing from the source provided
-        * @param ImportStreamSource $source
+        * @param ImportSource $source
          * @param Config $config
          */
-       function __construct( ImportStreamSource $source, Config $config = null ) {
+       function __construct( ImportSource $source, Config $config = null ) {
                 $this->reader = new XMLReader();
                 if ( !$config ) {
                         wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
@@ -67,6 +69,7 @@ class WikiImporter {
                 }
  
                 // Default callbacks
+               $this->setPageCallback( array( $this, 'beforeImportPage' ) );
                 $this->setRevisionCallback( array( $this, "importRevision" ) );
                 $this->setUploadCallback( array( $this, 'importUpload' ) );
                 $this->setLogItemCallback( array( $this, 'importLogItem' ) );
@@ -288,6 +291,19 @@ class WikiImporter {
                 $this->mImportUploads = $import;
         }
  
+       /**
+        * Default per-page callback. Sets up some things related to site statistics
+        * @param array $titleAndForeignTitle Two-element array, with Title object at
+        * index 0 and ForeignTitle object at index 1
+        * @return bool
+        */
+       public function beforeImportPage( $titleAndForeignTitle ) {
+               $title = $titleAndForeignTitle[0];
+               $page = WikiPage::factory( $title );
+               $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
+               return true;
+       }
+
         /**
          * Default per-revision callback, performs the import.
          * @param WikiRevision $revision
@@ -349,6 +365,26 @@ class WikiImporter {
          */
         public function finishImportPage( $title, $foreignTitle, $revCount,
                         $sRevCount, $pageInfo ) {
+
+               // Update article count statistics (T42009)
+               // The normal counting logic in WikiPage->doEditUpdates() is designed for
+               // one-revision-at-a-time editing, not bulk imports. In this situation it
+               // suffers from issues of slave lag. We let WikiPage handle the total page
+               // and revision count, and we implement our own custom logic for the
+               // article (content page) count.
+               $page = WikiPage::factory( $title );
+               $page->loadPageData( 'fromdbmaster' );
+               $content = $page->getContent();
+               $editInfo = $page->prepareContentForEdit( $content );
+               $countKey = 'title_' . $title->getPrefixedText();
+               $countable = $page->isCountable( $editInfo );
+               if ( array_key_exists( $countKey, $this->countableCache ) &&
+                       $countable != $this->countableCache[ $countKey ] ) {
+                       DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array(
+                               'articles' => ( (int)$countable - (int)$this->countableCache[ $countKey ] )
+                       ) ) );
+               }
+
                 $args = func_get_args();
                 return Hooks::run( 'AfterImportPage', $args );
         }
@@ -441,7 +477,8 @@ class WikiImporter {
         /**
          * Retrieves the contents of the named attribute of the current element.
          * @param string $attr The name of the attribute
-        * @return string The value of the attribute or an empty string if it is not set in the current element.
+        * @return string The value of the attribute or an empty string if it is not set in the current
+        * element.
          */
         public function nodeAttribute( $attr ) {
                 return $this->reader->getAttribute( $attr );
@@ -497,36 +534,48 @@ class WikiImporter {
  
                 $keepReading = $this->reader->read();
                 $skip = false;
-               while ( $keepReading ) {
-                       $tag = $this->reader->name;
-                       $type = $this->reader->nodeType;
-
-                       if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
-                               // Do nothing
-                       } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
-                               break;
-                       } elseif ( $tag == 'siteinfo' ) {
-                               $this->handleSiteInfo();
-                       } elseif ( $tag == 'page' ) {
-                               $this->handlePage();
-                       } elseif ( $tag == 'logitem' ) {
-                               $this->handleLogItem();
-                       } elseif ( $tag != '#text' ) {
-                               $this->warn( "Unhandled top-level XML tag $tag" );
-
-                               $skip = true;
-                       }
+               $rethrow = null;
+               try {
+                       while ( $keepReading ) {
+                               $tag = $this->reader->name;
+                               $type = $this->reader->nodeType;
+
+                               if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
+                                       // Do nothing
+                               } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
+                                       break;
+                               } elseif ( $tag == 'siteinfo' ) {
+                                       $this->handleSiteInfo();
+                               } elseif ( $tag == 'page' ) {
+                                       $this->handlePage();
+                               } elseif ( $tag == 'logitem' ) {
+                                       $this->handleLogItem();
+                               } elseif ( $tag != '#text' ) {
+                                       $this->warn( "Unhandled top-level XML tag $tag" );
+
+                                       $skip = true;
+                               }
  
-                       if ( $skip ) {
-                               $keepReading = $this->reader->next();
-                               $skip = false;
-                               $this->debug( "Skip" );
-                       } else {
-                               $keepReading = $this->reader->read();
+                               if ( $skip ) {
+                                       $keepReading = $this->reader->next();
+                                       $skip = false;
+                                       $this->debug( "Skip" );
+                               } else {
+                                       $keepReading = $this->reader->read();
+                               }
                         }
+               } catch ( Exception $ex ) {
+                       $rethrow = $ex;
                 }
  
+               // finally
                 libxml_disable_entity_loader( $oldDisable );
+               $this->reader->close();
+
+               if ( $rethrow ) {
+                       throw $rethrow;
+               }
+
                 return true;
         }
  
@@ -967,10 +1016,10 @@ class UploadSourceAdapter {
         private $mPosition;
  
         /**
-        * @param ImportStreamSource $source
+        * @param ImportSource $source
          * @return string
          */
-       static function registerSource( ImportStreamSource $source ) {
+       static function registerSource( ImportSource $source ) {
                 $id = wfRandomString();
  
                 self::$sourceRegistrations[$id] = $source;
@@ -1532,7 +1581,6 @@ class WikiRevision {
                                         $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
                                 return false;
                         }
-                       $oldcountable = $page->isCountable();
                 }
  
                 # @todo FIXME: Use original rev_id optionally (better for backups)
@@ -1555,10 +1603,11 @@ class WikiRevision {
  
                 if ( $changed !== false && !$this->mNoUpdates ) {
                         wfDebug( __METHOD__ . ": running updates\n" );
+                       // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
                         $page->doEditUpdates(
                                 $revision,
                                 $userObj,
-                               array( 'created' => $created, 'oldcountable' => $oldcountable )
+                               array( 'created' => $created, 'oldcountable' => 'no-change' )
                         );
                 }
  
@@ -1670,7 +1719,7 @@ class WikiRevision {
                         wfDebug( __METHOD__ . ": Successful\n" );
                         return true;
                 } else {
-                       wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
+                       wfDebug( __METHOD__ . ': failed: ' . $status->getHTML() . "\n" );
                         return false;
                 }
         }
@@ -1708,6 +1757,30 @@ class WikiRevision {
  
  }
  
+/**
+ * Source interface for XML import.
+ */
+interface ImportSource {
+
+       /**
+        * Indicates whether the end of the input has been reached.
+        * Will return true after a finite number of calls to readChunk.
+        *
+        * @return bool true if there is no more input, false otherwise.
+        */
+       function atEnd();
+
+       /**
+        * Return a chunk of the input, as a (possibly empty) string.
+        * When the end of input is reached, readChunk() returns false.
+        * If atEnd() returns false, readChunk() will return a string.
+        * If atEnd() returns true, readChunk() will return false.
+        *
+        * @return bool|string
+        */
+       function readChunk();
+}
+
  /**
   * Used for importing XML dumps where the content of the dump is in a string.
   * This class is ineffecient, and should only be used for small dumps.
@@ -1715,7 +1788,7 @@ class WikiRevision {
   *
   * @ingroup SpecialPage
   */
-class ImportStringSource {
+class ImportStringSource implements ImportSource {
         function __construct( $string ) {
                 $this->mString = $string;
                 $this->mRead = false;
@@ -1744,7 +1817,7 @@ class ImportStringSource {
   * Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
   * @ingroup SpecialPage
   */
-class ImportStreamSource {
+class ImportStreamSource implements ImportSource {
         function __construct( $handle ) {
                 $this->mHandle = $handle;
         }