Added DeleteLinksJob to support purging backlinks via job runners
authorAaron Schulz <aschulz@wikimedia.org>
Tue, 22 Sep 2015 19:07:05 +0000 (12:07 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Wed, 30 Sep 2015 00:40:19 +0000 (17:40 -0700)
* This jobs should only be constructed via relevant Content object,
  e.g. the result of enqueueUpdate() being called on a DataUpdate
  returned by Content::getSecondaryUpdates().
* Also modified LinksDeletionUpdate to support a $pageId parameter.
* LinksDeletionUpdate can now be enqueued to a DeleteLinksJob.

Change-Id: I650dcf0bd172ede0d61357ec158a4704ae1f2033

autoload.php
includes/DefaultSettings.php
includes/deferred/LinksDeletionUpdate.php
includes/jobqueue/jobs/DeleteLinksJob.php [new file with mode: 0644]
includes/page/WikiPage.php
tests/phpunit/includes/page/WikiPageTest.php

index f1b0a6c..7f0ef42 100644 (file)
@@ -314,6 +314,7 @@ $wgAutoloadLocalClasses = array(
        'DeleteDefaultMessages' => __DIR__ . '/maintenance/deleteDefaultMessages.php',
        'DeleteEqualMessages' => __DIR__ . '/maintenance/deleteEqualMessages.php',
        'DeleteFileOp' => __DIR__ . '/includes/filebackend/FileOp.php',
+       'DeleteLinksJob' => __DIR__ . '/includes/jobqueue/jobs/DeleteLinksJob.php',
        'DeleteLogFormatter' => __DIR__ . '/includes/logging/DeleteLogFormatter.php',
        'DeleteOldRevisions' => __DIR__ . '/maintenance/deleteOldRevisions.php',
        'DeleteOrphanedRevisions' => __DIR__ . '/maintenance/deleteOrphanedRevisions.php',
index 6b5155a..3088226 100644 (file)
@@ -6672,6 +6672,7 @@ $wgHooks = array();
  */
 $wgJobClasses = array(
        'refreshLinks' => 'RefreshLinksJob',
+       'deleteLinks' => 'DeleteLinksJob',
        'htmlCacheUpdate' => 'HTMLCacheUpdateJob',
        'sendMail' => 'EmaillingJob',
        'enotifNotify' => 'EnotifNotifyJob',
index b7cc70e..d784840 100644 (file)
  *
  * @file
  */
-
 /**
  * Update object handling the cleanup of links tables after a page was deleted.
  **/
-class LinksDeletionUpdate extends SqlDataUpdate {
-       /** @var WikiPage The WikiPage that was deleted */
-       protected $mPage;
+class LinksDeletionUpdate extends SqlDataUpdate implements EnqueueableDataUpdate {
+       /** @var WikiPage */
+       protected $page;
+       /** @var integer */
+       protected $pageId;
 
        /**
-        * Constructor
-        *
         * @param WikiPage $page Page we are updating
+        * @param integer|null $pageId ID of the page we are updating [optional]
         * @throws MWException
         */
-       function __construct( WikiPage $page ) {
+       function __construct( WikiPage $page, $pageId = null ) {
                parent::__construct( false ); // no implicit transaction
 
-               $this->mPage = $page;
-
-               if ( !$page->exists() ) {
+               $this->page = $page;
+               if ( $page->exists() ) {
+                       $this->pageId = $page->getId();
+               } elseif ( $pageId ) {
+                       $this->pageId = $pageId;
+               } else {
                        throw new MWException( "Page ID not known, perhaps the page doesn't exist?" );
                }
        }
 
-       /**
-        * Do some database updates after deletion
-        */
        public function doUpdate() {
-               $title = $this->mPage->getTitle();
-               $id = $this->mPage->getId();
+               # Page may already be deleted, so don't just getId()
+               $id = $this->pageId;
 
                # Delete restrictions for it
                $this->mDb->delete( 'page_restrictions', array( 'pr_page' => $id ), __METHOD__ );
 
                # Fix category table counts
-               $cats = array();
-               $res = $this->mDb->select( 'categorylinks', 'cl_to', array( 'cl_from' => $id ), __METHOD__ );
-
-               foreach ( $res as $row ) {
-                       $cats[] = $row->cl_to;
-               }
-
-               $this->mPage->updateCategoryCounts( array(), $cats );
+               $cats = $this->mDb->selectFieldValues(
+                       'categorylinks',
+                       'cl_to',
+                       array( 'cl_from' => $id ),
+                       __METHOD__
+               );
+               $this->page->updateCategoryCounts( array(), $cats );
 
                # If using cascading deletes, we can skip some explicit deletes
                if ( !$this->mDb->cascadingDeletes() ) {
@@ -79,6 +78,7 @@ class LinksDeletionUpdate extends SqlDataUpdate {
 
                # If using cleanup triggers, we can skip some manual deletes
                if ( !$this->mDb->cleanupTriggers() ) {
+                       $title = $this->page->getTitle();
                        # Find recentchanges entries to clean up...
                        $rcIdsForTitle = $this->mDb->selectFieldValues( 'recentchanges',
                                'rc_id',
@@ -102,4 +102,16 @@ class LinksDeletionUpdate extends SqlDataUpdate {
                        }
                }
        }
+
+       public function getAsJobSpecification() {
+               return array(
+                       'wiki' => $this->mDb->getWikiID(),
+                       'job'  => new JobSpecification(
+                               'deleteLinks',
+                               array( 'pageId' => $this->page->getId() ),
+                               array( 'removeDuplicates' => true ),
+                               $this->page->getTitle()
+                       )
+               );
+       }
 }
diff --git a/includes/jobqueue/jobs/DeleteLinksJob.php b/includes/jobqueue/jobs/DeleteLinksJob.php
new file mode 100644 (file)
index 0000000..b24109b
--- /dev/null
@@ -0,0 +1,57 @@
+<?php
+/**
+ * Job to update link tables for pages
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup JobQueue
+ */
+
+/**
+ * Job to prune link tables for pages that were deleted
+ *
+ * Only DataUpdate classes should construct these jobs
+ *
+ * @ingroup JobQueue
+ * @since 1.26
+ */
+class DeleteLinksJob extends Job {
+       function __construct( Title $title, array $params ) {
+               parent::__construct( 'deleteLinks', $title, $params );
+               $this->removeDuplicates = true;
+       }
+
+       function run() {
+               if ( is_null( $this->title ) ) {
+                       $this->setLastError( "deleteLinks: Invalid title" );
+                       return false;
+               }
+
+               $pageId = $this->params['pageId'];
+               if ( WikiPage::newFromID( $pageId, WikiPage::READ_LATEST ) ) {
+                       // The page was restored somehow or something went wrong
+                       $this->setLastError( "deleteLinks: Page #$pageId exists" );
+                       return false;
+               }
+
+               $page = WikiPage::factory( $this->title ); // title when deleted
+               $update = new LinksDeletionUpdate( $page, $pageId );
+               DataUpdate::runUpdates( array( $update ) );
+
+               return true;
+       }
+}
index 2fde832..708a875 100644 (file)
@@ -2931,12 +2931,15 @@ class WikiPage implements Page, IDBAccessObject {
         *   may already return null when the page proper was deleted.
         */
        public function doDeleteUpdates( $id, Content $content = null ) {
-               // update site status
+               // Update site status
                DeferredUpdates::addUpdate( new SiteStatsUpdate( 0, 1, - (int)$this->isCountable(), -1 ) );
 
-               // remove secondary indexes, etc
+               // Delete pagelinks, update secondary indexes, etc
                $updates = $this->getDeletionUpdates( $content );
-               DataUpdate::runUpdates( $updates, 'enqueue' );
+               // Make sure an enqueued jobs run after commit so they see the deletion
+               wfGetDB( DB_MASTER )->onTransactionIdle( function() use ( $updates ) {
+                       DataUpdate::runUpdates( $updates, 'enqueue' );
+               } );
 
                // Reparse any pages transcluding this page
                LinksUpdate::queueRecursiveJobsForTable( $this->mTitle, 'templatelinks' );
index ec08ef4..a21fc8a 100644 (file)
@@ -292,6 +292,12 @@ class WikiPageTest extends MediaWikiLangTestCase {
                        "Title::exists should return false after page was deleted"
                );
 
+               // Run the job queue
+               JobQueueGroup::destroySingletons();
+               $jobs = new RunJobs;
+               $jobs->loadParamsAndArgs( null, array( 'quiet' => true ), null );
+               $jobs->execute();
+
                # ------------------------
                $dbr = wfGetDB( DB_SLAVE );
                $res = $dbr->select( 'pagelinks', '*', array( 'pl_from' => $id ) );
@@ -312,8 +318,16 @@ class WikiPageTest extends MediaWikiLangTestCase {
                );
                $id = $page->getId();
 
+               // Similar to MovePage logic
+               wfGetDB( DB_MASTER )->delete( 'page', array( 'page_id' => $id ), __METHOD__ );
                $page->doDeleteUpdates( $id );
 
+               // Run the job queue
+               JobQueueGroup::destroySingletons();
+               $jobs = new RunJobs;
+               $jobs->loadParamsAndArgs( null, array( 'quiet' => true ), null );
+               $jobs->execute();
+
                # ------------------------
                $dbr = wfGetDB( DB_SLAVE );
                $res = $dbr->select( 'pagelinks', '*', array( 'pl_from' => $id ) );