From: Aaron Schulz Date: Fri, 13 Dec 2013 00:54:51 +0000 (-0800) Subject: Added a page_links_updated column for job de-duplication X-Git-Tag: 1.31.0-rc.0~17523 X-Git-Url: https://git.heureux-cyclage.org/?a=commitdiff_plain;h=b57e4570913cbe385fe515f3a0adb8a3c7de55cf;p=lhc%2Fweb%2Fwiklou.git Added a page_links_updated column for job de-duplication Change-Id: I74b6f507ef7371db92e0c3f058d38c0ca5dea9ef --- diff --git a/RELEASE-NOTES-1.23 b/RELEASE-NOTES-1.23 index 5ece7f1cc4..db80ad3c09 100644 --- a/RELEASE-NOTES-1.23 +++ b/RELEASE-NOTES-1.23 @@ -125,6 +125,7 @@ changes to languages because of Bugzilla reports. 'skins.vector.styles' and 'skins.monobook.styles', respectively, and their definition was changed not to include the common*.css files; the two skins now load the 'skins.common.interface' module instead. +* A page_links_updated field has been added to the page table. == Compatibility == diff --git a/includes/WikiPage.php b/includes/WikiPage.php index 0d987aa4c0..cc65ee8fda 100644 --- a/includes/WikiPage.php +++ b/includes/WikiPage.php @@ -83,6 +83,11 @@ class WikiPage implements Page, IDBAccessObject { */ protected $mTouched = '19700101000000'; + /** + * @var string + */ + protected $mLinksUpdated = '19700101000000'; + /** * @var int|null */ @@ -241,6 +246,7 @@ class WikiPage implements Page, IDBAccessObject { $this->mRedirectTarget = null; // Title object if set $this->mLastRevision = null; // Latest revision $this->mTouched = '19700101000000'; + $this->mLinksUpdated = '19700101000000'; $this->mTimestamp = ''; $this->mIsRedirect = false; $this->mLatest = false; @@ -278,6 +284,7 @@ class WikiPage implements Page, IDBAccessObject { 'page_is_new', 'page_random', 'page_touched', + 'page_links_updated', 'page_latest', 'page_len', ); @@ -405,6 +412,7 @@ class WikiPage implements Page, IDBAccessObject { $this->mId = intval( $data->page_id ); $this->mCounter = intval( $data->page_counter ); $this->mTouched = wfTimestamp( TS_MW, $data->page_touched ); + $this->mLinksUpdated = wfTimestampOrNull( TS_MW, $data->page_links_updated ); $this->mIsRedirect = intval( $data->page_is_redirect ); $this->mLatest = intval( $data->page_latest ); // Bug 37225: $latest may no longer match the cached latest Revision object. @@ -533,6 +541,17 @@ class WikiPage implements Page, IDBAccessObject { return $this->mTouched; } + /** + * Get the page_links_updated field + * @return string|null containing GMT timestamp + */ + public function getLinksTimestamp() { + if ( !$this->mDataLoaded ) { + $this->loadPageData(); + } + return $this->mLinksUpdated; + } + /** * Get the page_latest field * @return integer rev_id of current revision diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index 9cd7708eff..d5ed250af3 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -218,6 +218,9 @@ class LinksUpdate extends SqlDataUpdate { $changed = $propertiesDeletes + array_diff_assoc( $this->mProperties, $existing ); $this->invalidateProperties( $changed ); + # Update the links table freshness for this title + $this->updateLinksTimestamp(); + # Refresh links of all pages including this page # This will be in a separate transaction if ( $this->mRecursive ) { @@ -855,6 +858,19 @@ class LinksUpdate extends SqlDataUpdate { return $result; } + + /** + * Update links table freshness + */ + protected function updateLinksTimestamp() { + if ( $this->mId ) { + $this->mDb->update( 'page', + array( 'page_links_updated' => $this->mDb->timestamp() ), + array( 'page_id' => $this->mId ), + __METHOD__ + ); + } + } } /** diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index 773afca7db..b3ea964746 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -248,6 +248,7 @@ class MysqlUpdater extends DatabaseUpdater { array( 'addField', 'recentchanges', 'rc_source', 'patch-rc_source.sql' ), array( 'addIndex', 'logging', 'log_user_text_type_time', 'patch-logging_user_text_type_time_index.sql' ), array( 'addIndex', 'logging', 'log_user_text_time', 'patch-logging_user_text_time_index.sql' ), + array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ), ); } diff --git a/includes/installer/PostgresUpdater.php b/includes/installer/PostgresUpdater.php index dac1400945..fa58a62a26 100644 --- a/includes/installer/PostgresUpdater.php +++ b/includes/installer/PostgresUpdater.php @@ -402,6 +402,7 @@ class PostgresUpdater extends DatabaseUpdater { // 1.23 array( 'addPgField', 'recentchanges', 'rc_source', "TEXT NOT NULL DEFAULT ''" ), + array( 'addPgField', 'page', 'page_links_updated', "TIMESTAMPTZ NULL" ), ); } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index 8f117dfe30..3db3758cba 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -125,6 +125,7 @@ class SqliteUpdater extends DatabaseUpdater { array( 'addField', 'recentchanges', 'rc_source', 'patch-rc_source.sql' ), array( 'addIndex', 'logging', 'log_user_text_type_time', 'patch-logging_user_text_type_time_index.sql' ), array( 'addIndex', 'logging', 'log_user_text_time', 'patch-logging_user_text_time_index.sql' ), + array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ), ); } diff --git a/includes/job/jobs/RefreshLinksJob.php b/includes/job/jobs/RefreshLinksJob.php index bdf0fdf07c..78ac84d76b 100644 --- a/includes/job/jobs/RefreshLinksJob.php +++ b/includes/job/jobs/RefreshLinksJob.php @@ -133,6 +133,10 @@ class RefreshLinksJob extends Job { if ( isset( $this->params['rootJobTimestamp'] ) ) { $page = WikiPage::factory( $title ); $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5; + if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { + // Something already updated the backlinks since this job was made + return true; + } if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { $parserOptions = $page->makeParserOptions( 'canonical' ); $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions ); diff --git a/maintenance/archives/patch-page_links_updated.sql b/maintenance/archives/patch-page_links_updated.sql new file mode 100644 index 0000000000..18d9e2d9cd --- /dev/null +++ b/maintenance/archives/patch-page_links_updated.sql @@ -0,0 +1,2 @@ +ALTER TABLE /*$wgDBprefix*/page + ADD page_links_updated varbinary(14) NULL default NULL; diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql index b8a71850e1..3940ba2a6c 100644 --- a/maintenance/postgres/tables.sql +++ b/maintenance/postgres/tables.sql @@ -80,6 +80,7 @@ CREATE TABLE page ( page_is_new SMALLINT NOT NULL DEFAULT 0, page_random NUMERIC(15,14) NOT NULL DEFAULT RANDOM(), page_touched TIMESTAMPTZ, + page_links_updated TIMESTAMPTZ NULL, page_latest INTEGER NOT NULL, -- FK? page_len INTEGER NOT NULL, page_content_model TEXT diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 9a93c44d47..fb2f0e1a93 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -256,6 +256,11 @@ CREATE TABLE /*_*/page ( -- of contained templates. page_touched binary(14) NOT NULL default '', + -- This timestamp is updated whenever a page is re-parsed and + -- it has all the link tracking tables updated for it. This is + -- useful for de-duplicating expensive backlink update jobs. + page_links_updated varbinary(14) NULL default NULL, + -- Handy key to revision.rev_id of the current revision. -- This may be 0 during page creation, but that shouldn't -- happen outside of a transaction... hopefully.