Merge "tests: Remove unused TableCleanupTest class"
[lhc/web/wiklou.git] / includes / jobqueue / jobs / RefreshLinksJob.php
1 <?php
2 /**
3 * Job to update link tables for pages
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup JobQueue
22 */
23
24 /**
25 * Job to update link tables for pages
26 *
27 * This job comes in a few variants:
28 * - a) Recursive jobs to update links for backlink pages for a given title.
29 * These jobs have (recursive:true,table:<table>) set.
30 * - b) Jobs to update links for a set of pages (the job title is ignored).
31 * These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set.
32 * - c) Jobs to update links for a single page (the job title)
33 * These jobs need no extra fields set.
34 *
35 * @ingroup JobQueue
36 */
37 class RefreshLinksJob extends Job {
38 /** @var float Cache parser output when it takes this long to render */
39 const PARSE_THRESHOLD_SEC = 1.0;
40 /** @var integer Lag safety margin when comparing root job times to last-refresh times */
41 const CLOCK_FUDGE = 10;
42
43 function __construct( Title $title, array $params ) {
44 parent::__construct( 'refreshLinks', $title, $params );
45 // Avoid the overhead of de-duplication when it would be pointless
46 $this->removeDuplicates = (
47 // Master positions won't match
48 !isset( $params['masterPos'] ) &&
49 // Ranges rarely will line up
50 !isset( $params['range'] ) &&
51 // Multiple pages per job make matches unlikely
52 !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 )
53 );
54 }
55
56 /**
57 * @param Title $title
58 * @param array $params
59 * @return RefreshLinksJob
60 */
61 public static function newPrioritized( Title $title, array $params ) {
62 $job = new self( $title, $params );
63 $job->command = 'refreshLinksPrioritized';
64
65 return $job;
66 }
67
68 /**
69 * @param Title $title
70 * @param array $params
71 * @return RefreshLinksJob
72 */
73 public static function newDynamic( Title $title, array $params ) {
74 $job = new self( $title, $params );
75 $job->command = 'refreshLinksDynamic';
76
77 return $job;
78 }
79
80 function run() {
81 global $wgUpdateRowsPerJob;
82
83 // Job to update all (or a range of) backlink pages for a page
84 if ( !empty( $this->params['recursive'] ) ) {
85 // Carry over information for de-duplication
86 $extraParams = $this->getRootJobParams();
87 // Avoid slave lag when fetching templates.
88 // When the outermost job is run, we know that the caller that enqueued it must have
89 // committed the relevant changes to the DB by now. At that point, record the master
90 // position and pass it along as the job recursively breaks into smaller range jobs.
91 // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
92 if ( isset( $this->params['masterPos'] ) ) {
93 $extraParams['masterPos'] = $this->params['masterPos'];
94 } elseif ( wfGetLB()->getServerCount() > 1 ) {
95 $extraParams['masterPos'] = wfGetLB()->getMasterPos();
96 } else {
97 $extraParams['masterPos'] = false;
98 }
99 $extraParams['triggeredRecursive'] = true;
100 // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title
101 // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks
102 $jobs = BacklinkJobUtils::partitionBacklinkJob(
103 $this,
104 $wgUpdateRowsPerJob,
105 1, // job-per-title
106 array( 'params' => $extraParams )
107 );
108 JobQueueGroup::singleton()->push( $jobs );
109 // Job to update link tables for a set of titles
110 } elseif ( isset( $this->params['pages'] ) ) {
111 $this->waitForMasterPosition();
112 foreach ( $this->params['pages'] as $pageId => $nsAndKey ) {
113 list( $ns, $dbKey ) = $nsAndKey;
114 $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) );
115 }
116 // Job to update link tables for a given title
117 } else {
118 $this->waitForMasterPosition();
119 $this->runForTitle( $this->title );
120 }
121
122 return true;
123 }
124
125 protected function waitForMasterPosition() {
126 if ( !empty( $this->params['masterPos'] ) && wfGetLB()->getServerCount() > 1 ) {
127 // Wait for the current/next slave DB handle to catch up to the master.
128 // This way, we get the correct page_latest for templates or files that just
129 // changed milliseconds ago, having triggered this job to begin with.
130 wfGetLB()->waitFor( $this->params['masterPos'] );
131 }
132 }
133
134 /**
135 * @param Title $title
136 * @return bool
137 */
138 protected function runForTitle( Title $title ) {
139 $page = WikiPage::factory( $title );
140 if ( !empty( $this->params['triggeringRevisionId'] ) ) {
141 // Fetch the specified revision; lockAndGetLatest() below detects if the page
142 // was edited since and aborts in order to avoid corrupting the link tables
143 $revision = Revision::newFromId(
144 $this->params['triggeringRevisionId'],
145 Revision::READ_LATEST
146 );
147 } else {
148 // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures
149 $revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST );
150 }
151
152 if ( !$revision ) {
153 $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" );
154 return false; // just deleted?
155 }
156
157 $content = $revision->getContent( Revision::RAW );
158 if ( !$content ) {
159 // If there is no content, pretend the content is empty
160 $content = $revision->getContentHandler()->makeEmptyContent();
161 }
162
163 $parserOutput = false;
164 $parserOptions = $page->makeParserOptions( 'canonical' );
165 // If page_touched changed after this root job, then it is likely that
166 // any views of the pages already resulted in re-parses which are now in
167 // cache. The cache can be reused to avoid expensive parsing in some cases.
168 if ( isset( $this->params['rootJobTimestamp'] ) ) {
169 $opportunistic = !empty( $this->params['isOpportunistic'] );
170
171 $skewedTimestamp = $this->params['rootJobTimestamp'];
172 if ( $opportunistic ) {
173 // Neither clock skew nor DB snapshot/slave lag matter much for such
174 // updates; focus on reusing the (often recently updated) cache
175 } else {
176 // For transclusion updates, the template changes must be reflected
177 $skewedTimestamp = wfTimestamp( TS_MW,
178 wfTimestamp( TS_UNIX, $skewedTimestamp ) + self::CLOCK_FUDGE
179 );
180 }
181
182 if ( $page->getLinksTimestamp() > $skewedTimestamp ) {
183 // Something already updated the backlinks since this job was made
184 return true;
185 }
186
187 if ( $page->getTouched() >= $skewedTimestamp || $opportunistic ) {
188 // Something bumped page_touched since this job was made
189 // or the cache is otherwise suspected to be up-to-date
190 $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions );
191 if ( $parserOutput && $parserOutput->getCacheTime() < $skewedTimestamp ) {
192 $parserOutput = false; // too stale
193 }
194 }
195 }
196
197 // Fetch the current revision and parse it if necessary...
198 if ( $parserOutput == false ) {
199 $start = microtime( true );
200 // Revision ID must be passed to the parser output to get revision variables correct
201 $parserOutput = $content->getParserOutput(
202 $title, $revision->getId(), $parserOptions, false );
203 $elapsed = microtime( true ) - $start;
204 // If it took a long time to render, then save this back to the cache to avoid
205 // wasted CPU by other apaches or job runners. We don't want to always save to
206 // cache as this can cause high cache I/O and LRU churn when a template changes.
207 if ( $elapsed >= self::PARSE_THRESHOLD_SEC
208 && $page->shouldCheckParserCache( $parserOptions, $revision->getId() )
209 && $parserOutput->isCacheable()
210 ) {
211 $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time
212 ParserCache::singleton()->save(
213 $parserOutput, $page, $parserOptions, $ctime, $revision->getId()
214 );
215 }
216 }
217
218 $updates = $content->getSecondaryDataUpdates(
219 $title,
220 null,
221 !empty( $this->params['useRecursiveLinksUpdate'] ),
222 $parserOutput
223 );
224
225 $latestNow = $page->lockAndGetLatest();
226 if ( !$latestNow || $revision->getId() != $latestNow ) {
227 // Do not clobber over newer updates with older ones. If all jobs where FIFO and
228 // serialized, it would be OK to update links based on older revisions since it
229 // would eventually get to the latest. Since that is not the case (by design),
230 // only update the link tables to a state matching the current revision's output.
231 $this->setLastError( "page_latest changed from {$revision->getId()} to $latestNow" );
232 return false;
233 }
234
235 DataUpdate::runUpdates( $updates );
236
237 InfoAction::invalidateCache( $title );
238
239 return true;
240 }
241
242 public function getDeduplicationInfo() {
243 $info = parent::getDeduplicationInfo();
244 if ( is_array( $info['params'] ) ) {
245 // For per-pages jobs, the job title is that of the template that changed
246 // (or similar), so remove that since it ruins duplicate detection
247 if ( isset( $info['pages'] ) ) {
248 unset( $info['namespace'] );
249 unset( $info['title'] );
250 }
251 }
252
253 return $info;
254 }
255
256 public function workItemCount() {
257 return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
258 }
259 }