From 341dfa2587220c8e9dff5866036b3092ceb682c4 Mon Sep 17 00:00:00 2001 From: "This, that and the other" Date: Wed, 4 Feb 2015 18:00:36 +1100 Subject: [PATCH] Cache countable statistics to prevent multiple counting on import At the moment, when $wgArticleCountMethod = 'link' (as it is on the WMF cluster), we are querying the Slave database before each individual revision is imported, in order to find out whether the page is countable at that time. This is not sensible, as (1) the slave lags behind the master, but (2) even the master may not be up to date, since page link updates take place through the job queue. This change sets up a cache to hold countable values for pages where import activity has already occurred. That way, we aren't hitting the DB on every revision, only to get an incorrect response back. Bug: T42009 Change-Id: I99189c82672d7790cda5036b6aa9883ce6e566b0 --- includes/Import.php | 40 ++++++++++++++++++++++++++++++++++++-- includes/page/WikiPage.php | 10 +++++++--- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/includes/Import.php b/includes/Import.php index c3caeccffa..a2d4e83ffd 100644 --- a/includes/Import.php +++ b/includes/Import.php @@ -42,6 +42,8 @@ class WikiImporter { private $config; /** @var ImportTitleFactory */ private $importTitleFactory; + /** @var array */ + private $countableCache = array(); /** * Creates an ImportXMLReader drawing from the source provided @@ -67,6 +69,7 @@ class WikiImporter { } // Default callbacks + $this->setPageCallback( array( $this, 'beforeImportPage' ) ); $this->setRevisionCallback( array( $this, "importRevision" ) ); $this->setUploadCallback( array( $this, 'importUpload' ) ); $this->setLogItemCallback( array( $this, 'importLogItem' ) ); @@ -288,6 +291,19 @@ class WikiImporter { $this->mImportUploads = $import; } + /** + * Default per-page callback. Sets up some things related to site statistics + * @param array $titleAndForeignTitle Two-element array, with Title object at + * index 0 and ForeignTitle object at index 1 + * @return bool + */ + public function beforeImportPage( $titleAndForeignTitle ) { + $title = $titleAndForeignTitle[0]; + $page = WikiPage::factory( $title ); + $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable(); + return true; + } + /** * Default per-revision callback, performs the import. * @param WikiRevision $revision @@ -349,6 +365,26 @@ class WikiImporter { */ public function finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo ) { + + // Update article count statistics (T42009) + // The normal counting logic in WikiPage->doEditUpdates() is designed for + // one-revision-at-a-time editing, not bulk imports. In this situation it + // suffers from issues of slave lag. We let WikiPage handle the total page + // and revision count, and we implement our own custom logic for the + // article (content page) count. + $page = WikiPage::factory( $title ); + $page->loadPageData( 'fromdbmaster' ); + $content = $page->getContent(); + $editInfo = $page->prepareContentForEdit( $content ); + + $countable = $page->isCountable( $editInfo ); + $oldcountable = $this->countableCache['title_' . $title->getPrefixedText()]; + if ( isset( $oldcountable ) && $countable != $oldcountable ) { + DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array( + 'articles' => ( (int)$countable - (int)$oldcountable ) + ) ) ); + } + $args = func_get_args(); return Hooks::run( 'AfterImportPage', $args ); } @@ -1532,7 +1568,6 @@ class WikiRevision { $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); return false; } - $oldcountable = $page->isCountable(); } # @todo FIXME: Use original rev_id optionally (better for backups) @@ -1555,10 +1590,11 @@ class WikiRevision { if ( $changed !== false && !$this->mNoUpdates ) { wfDebug( __METHOD__ . ": running updates\n" ); + // countable/oldcountable stuff is handled in WikiImporter::finishImportPage $page->doEditUpdates( $revision, $userObj, - array( 'created' => $created, 'oldcountable' => $oldcountable ) + array( 'created' => $created, 'oldcountable' => 'no-change' ) ); } diff --git a/includes/page/WikiPage.php b/includes/page/WikiPage.php index dcebe54216..9e071af54d 100644 --- a/includes/page/WikiPage.php +++ b/includes/page/WikiPage.php @@ -2158,10 +2158,12 @@ class WikiPage implements Page, IDBAccessObject { * - changed: boolean, whether the revision changed the content (default true) * - created: boolean, whether the revision created the page (default false) * - moved: boolean, whether the page was moved (default false) - * - oldcountable: boolean or null (default null): + * - oldcountable: boolean, null, or string 'no-change' (default null): * - boolean: whether the page was counted as an article before that * revision, only used in changed is true and created is false - * - null: don't change the article count + * - null: if created is false, don't update the article count; if created + * is true, do update the article count + * - 'no-change': don't update the article count, ever */ public function doEditUpdates( Revision $revision, User $user, array $options = array() ) { global $wgEnableParserCache; @@ -2222,7 +2224,9 @@ class WikiPage implements Page, IDBAccessObject { $title = $this->mTitle->getPrefixedDBkey(); $shortTitle = $this->mTitle->getDBkey(); - if ( !$options['changed'] && !$options['moved'] ) { + if ( $options['oldcountable'] === 'no-change' || + ( !$options['changed'] && !$options['moved'] ) + ) { $good = 0; } elseif ( $options['created'] ) { $good = (int)$this->isCountable( $editInfo ); -- 2.20.1