Added a page_links_updated column for job de-duplication
authorAaron Schulz <aschulz@wikimedia.org>
Fri, 13 Dec 2013 00:54:51 +0000 (16:54 -0800)
committerSpringle <springle@wikimedia.org>
Tue, 24 Dec 2013 10:54:32 +0000 (10:54 +0000)
Change-Id: I74b6f507ef7371db92e0c3f058d38c0ca5dea9ef

RELEASE-NOTES-1.23
includes/WikiPage.php
includes/deferred/LinksUpdate.php
includes/installer/MysqlUpdater.php
includes/installer/PostgresUpdater.php
includes/installer/SqliteUpdater.php
includes/job/jobs/RefreshLinksJob.php
maintenance/archives/patch-page_links_updated.sql [new file with mode: 0644]
maintenance/postgres/tables.sql
maintenance/tables.sql

index 5ece7f1..db80ad3 100644 (file)
@@ -125,6 +125,7 @@ changes to languages because of Bugzilla reports.
   'skins.vector.styles' and 'skins.monobook.styles', respectively,
   and their definition was changed not to include the common*.css files;
   the two skins now load the 'skins.common.interface' module instead.
+* A page_links_updated field has been added to the page table.
 
 == Compatibility ==
 
index 0d987aa..cc65ee8 100644 (file)
@@ -83,6 +83,11 @@ class WikiPage implements Page, IDBAccessObject {
         */
        protected $mTouched = '19700101000000';
 
+       /**
+        * @var string
+        */
+       protected $mLinksUpdated = '19700101000000';
+
        /**
         * @var int|null
         */
@@ -241,6 +246,7 @@ class WikiPage implements Page, IDBAccessObject {
                $this->mRedirectTarget = null; // Title object if set
                $this->mLastRevision = null; // Latest revision
                $this->mTouched = '19700101000000';
+               $this->mLinksUpdated = '19700101000000';
                $this->mTimestamp = '';
                $this->mIsRedirect = false;
                $this->mLatest = false;
@@ -278,6 +284,7 @@ class WikiPage implements Page, IDBAccessObject {
                        'page_is_new',
                        'page_random',
                        'page_touched',
+                       'page_links_updated',
                        'page_latest',
                        'page_len',
                );
@@ -405,6 +412,7 @@ class WikiPage implements Page, IDBAccessObject {
                        $this->mId = intval( $data->page_id );
                        $this->mCounter = intval( $data->page_counter );
                        $this->mTouched = wfTimestamp( TS_MW, $data->page_touched );
+                       $this->mLinksUpdated = wfTimestampOrNull( TS_MW, $data->page_links_updated );
                        $this->mIsRedirect = intval( $data->page_is_redirect );
                        $this->mLatest = intval( $data->page_latest );
                        // Bug 37225: $latest may no longer match the cached latest Revision object.
@@ -533,6 +541,17 @@ class WikiPage implements Page, IDBAccessObject {
                return $this->mTouched;
        }
 
+       /**
+        * Get the page_links_updated field
+        * @return string|null containing GMT timestamp
+        */
+       public function getLinksTimestamp() {
+               if ( !$this->mDataLoaded ) {
+                       $this->loadPageData();
+               }
+               return $this->mLinksUpdated;
+       }
+
        /**
         * Get the page_latest field
         * @return integer rev_id of current revision
index 9cd7708..d5ed250 100644 (file)
@@ -218,6 +218,9 @@ class LinksUpdate extends SqlDataUpdate {
                $changed = $propertiesDeletes + array_diff_assoc( $this->mProperties, $existing );
                $this->invalidateProperties( $changed );
 
+               # Update the links table freshness for this title
+               $this->updateLinksTimestamp();
+
                # Refresh links of all pages including this page
                # This will be in a separate transaction
                if ( $this->mRecursive ) {
@@ -855,6 +858,19 @@ class LinksUpdate extends SqlDataUpdate {
 
                return $result;
        }
+
+       /**
+        * Update links table freshness
+        */
+       protected function updateLinksTimestamp() {
+               if ( $this->mId ) {
+                       $this->mDb->update( 'page',
+                               array( 'page_links_updated' => $this->mDb->timestamp() ),
+                               array( 'page_id' => $this->mId ),
+                               __METHOD__
+                       );
+               }
+       }
 }
 
 /**
index 773afca..b3ea964 100644 (file)
@@ -248,6 +248,7 @@ class MysqlUpdater extends DatabaseUpdater {
                        array( 'addField', 'recentchanges', 'rc_source', 'patch-rc_source.sql' ),
                        array( 'addIndex', 'logging', 'log_user_text_type_time',  'patch-logging_user_text_type_time_index.sql' ),
                        array( 'addIndex', 'logging', 'log_user_text_time',  'patch-logging_user_text_time_index.sql' ),
+                       array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ),
                );
        }
 
index dac1400..fa58a62 100644 (file)
@@ -402,6 +402,7 @@ class PostgresUpdater extends DatabaseUpdater {
 
                        // 1.23
                        array( 'addPgField', 'recentchanges', 'rc_source', "TEXT NOT NULL DEFAULT ''" ),
+                       array( 'addPgField', 'page', 'page_links_updated', "TIMESTAMPTZ NULL" ),
                );
        }
 
index 8f117df..3db3758 100644 (file)
@@ -125,6 +125,7 @@ class SqliteUpdater extends DatabaseUpdater {
                        array( 'addField', 'recentchanges', 'rc_source', 'patch-rc_source.sql' ),
                        array( 'addIndex', 'logging', 'log_user_text_type_time',  'patch-logging_user_text_type_time_index.sql' ),
                        array( 'addIndex', 'logging', 'log_user_text_time',  'patch-logging_user_text_time_index.sql' ),
+                       array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ),
                );
        }
 
index bdf0fdf..78ac84d 100644 (file)
@@ -133,6 +133,10 @@ class RefreshLinksJob extends Job {
                if ( isset( $this->params['rootJobTimestamp'] ) ) {
                        $page = WikiPage::factory( $title );
                        $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5;
+                       if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
+                               // Something already updated the backlinks since this job was made
+                               return true;
+                       }
                        if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
                                $parserOptions = $page->makeParserOptions( 'canonical' );
                                $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions );
diff --git a/maintenance/archives/patch-page_links_updated.sql b/maintenance/archives/patch-page_links_updated.sql
new file mode 100644 (file)
index 0000000..18d9e2d
--- /dev/null
@@ -0,0 +1,2 @@
+ALTER TABLE /*$wgDBprefix*/page
+  ADD page_links_updated varbinary(14) NULL default NULL;
index b8a7185..3940ba2 100644 (file)
@@ -80,6 +80,7 @@ CREATE TABLE page (
   page_is_new        SMALLINT       NOT NULL  DEFAULT 0,
   page_random        NUMERIC(15,14) NOT NULL  DEFAULT RANDOM(),
   page_touched       TIMESTAMPTZ,
+  page_links_updated TIMESTAMPTZ    NULL,
   page_latest        INTEGER        NOT NULL, -- FK?
   page_len           INTEGER        NOT NULL,
   page_content_model TEXT
index 9a93c44..fb2f0e1 100644 (file)
@@ -256,6 +256,11 @@ CREATE TABLE /*_*/page (
   -- of contained templates.
   page_touched binary(14) NOT NULL default '',
 
+  -- This timestamp is updated whenever a page is re-parsed and
+  -- it has all the link tracking tables updated for it. This is
+  -- useful for de-duplicating expensive backlink update jobs.
+  page_links_updated varbinary(14) NULL default NULL,
+
   -- Handy key to revision.rev_id of the current revision.
   -- This may be 0 during page creation, but that shouldn't
   -- happen outside of a transaction... hopefully.