86c808bb9fa28c491c0d0160cbd7854b9ae7b351
[lhc/web/wiklou.git] / includes / job / jobs / RefreshLinksJob.php
1 <?php
2 /**
3 * Job to update links for a given title.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup JobQueue
22 */
23
24 /**
25 * Background job to update links for a given title.
26 *
27 * @ingroup JobQueue
28 */
29 class RefreshLinksJob extends Job {
30 function __construct( $title, $params = '', $id = 0 ) {
31 parent::__construct( 'refreshLinks', $title, $params, $id );
32 $this->removeDuplicates = true; // job is expensive
33 }
34
35 /**
36 * Run a refreshLinks job
37 * @return boolean success
38 */
39 function run() {
40 wfProfileIn( __METHOD__ );
41
42 $linkCache = LinkCache::singleton();
43 $linkCache->clear();
44
45 if ( is_null( $this->title ) ) {
46 $this->error = "refreshLinks: Invalid title";
47 wfProfileOut( __METHOD__ );
48 return false;
49 }
50
51 # Wait for the DB of the current/next slave DB handle to catch up to the master.
52 # This way, we get the correct page_latest for templates or files that just changed
53 # milliseconds ago, having triggered this job to begin with.
54 if ( isset( $this->params['masterPos'] ) ) {
55 wfGetLB()->waitFor( $this->params['masterPos'] );
56 }
57
58 $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL );
59 if ( !$revision ) {
60 $this->error = 'refreshLinks: Article not found "' .
61 $this->title->getPrefixedDBkey() . '"';
62 wfProfileOut( __METHOD__ );
63 return false; // XXX: what if it was just deleted?
64 }
65
66 self::runForTitleInternal( $this->title, $revision, __METHOD__ );
67
68 wfProfileOut( __METHOD__ );
69 return true;
70 }
71
72 public static function runForTitleInternal( Title $title, Revision $revision, $fname ) {
73 wfProfileIn( $fname );
74 $content = $revision->getContent( Revision::RAW );
75
76 if ( !$content ) {
77 // if there is no content, pretend the content is empty
78 $content = $revision->getContentHandler()->makeEmptyContent();
79 }
80
81 // Revision ID must be passed to the parser output to get revision variables correct
82 $parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false );
83
84 $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput );
85 DataUpdate::runUpdates( $updates );
86 wfProfileOut( $fname );
87 }
88 }
89
90 /**
91 * Background job to update links for a given title.
92 * Newer version for high use templates.
93 *
94 * @ingroup JobQueue
95 */
96 class RefreshLinksJob2 extends Job {
97 const MAX_TITLES_RUN = 10;
98
99 function __construct( $title, $params, $id = 0 ) {
100 parent::__construct( 'refreshLinks2', $title, $params, $id );
101 }
102
103 /**
104 * Run a refreshLinks2 job
105 * @return boolean success
106 */
107 function run() {
108 wfProfileIn( __METHOD__ );
109
110 $linkCache = LinkCache::singleton();
111 $linkCache->clear();
112
113 if ( is_null( $this->title ) ) {
114 $this->error = "refreshLinks2: Invalid title";
115 wfProfileOut( __METHOD__ );
116 return false;
117 } elseif ( !isset( $this->params['start'] ) || !isset( $this->params['end'] ) ) {
118 $this->error = "refreshLinks2: Invalid params";
119 wfProfileOut( __METHOD__ );
120 return false;
121 }
122
123 // Back compat for pre-r94435 jobs
124 $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
125
126 // Avoid slave lag when fetching templates
127 if ( isset( $this->params['masterPos'] ) ) {
128 $masterPos = $this->params['masterPos'];
129 } elseif ( wfGetLB()->getServerCount() > 1 ) {
130 $masterPos = wfGetLB()->getMasterPos();
131 } else {
132 $masterPos = false;
133 }
134
135 $titles = $this->title->getBacklinkCache()->getLinks(
136 $table, $this->params['start'], $this->params['end'] );
137
138 if ( $titles->count() > self::MAX_TITLES_RUN ) {
139 # We don't want to parse too many pages per job as it can starve other jobs.
140 # If there are too many pages to parse, break this up into smaller jobs. By passing
141 # in the master position here we can cut down on the time spent waiting for slaves to
142 # catch up by the runners handling these jobs since time will have passed between now
143 # and when they pop these jobs off the queue.
144 $start = 0; // batch start
145 $end = 0; // batch end
146 $bsize = 0; // batch size
147 $first = true; // first of batch
148 $jobs = array();
149 foreach ( $titles as $title ) {
150 $start = $first ? $title->getArticleId() : $start;
151 $end = $title->getArticleId();
152 $first = false;
153 if ( ++$bsize >= self::MAX_TITLES_RUN ) {
154 $jobs[] = new RefreshLinksJob2( $this->title, array(
155 'table' => $table,
156 'start' => $start,
157 'end' => $end,
158 'masterPos' => $masterPos
159 ) );
160 $first = true;
161 $start = $end = $bsize = 0;
162 }
163 }
164 if ( $bsize > 0 ) { // group remaining pages into a job
165 $jobs[] = new RefreshLinksJob2( $this->title, array(
166 'table' => $table,
167 'start' => $start,
168 'end' => $end,
169 'masterPos' => $masterPos
170 ) );
171 }
172 Job::batchInsert( $jobs );
173 } elseif ( php_sapi_name() != 'cli' ) {
174 # Not suitable for page load triggered job running!
175 # Gracefully switch to refreshLinks jobs if this happens.
176 $jobs = array();
177 foreach ( $titles as $title ) {
178 $jobs[] = new RefreshLinksJob( $title, array( 'masterPos' => $masterPos ) );
179 }
180 Job::batchInsert( $jobs );
181 } else {
182 # Wait for the DB of the current/next slave DB handle to catch up to the master.
183 # This way, we get the correct page_latest for templates or files that just changed
184 # milliseconds ago, having triggered this job to begin with.
185 if ( $masterPos ) {
186 wfGetLB()->waitFor( $masterPos );
187 }
188 # Re-parse each page that transcludes this page and update their tracking links...
189 foreach ( $titles as $title ) {
190 $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
191 if ( !$revision ) {
192 $this->error = 'refreshLinks: Article not found "' .
193 $title->getPrefixedDBkey() . '"';
194 continue; // skip this page
195 }
196 RefreshLinksJob::runForTitleInternal( $title, $revision, __METHOD__ );
197 wfWaitForSlaves();
198 }
199 }
200
201 wfProfileOut( __METHOD__ );
202 return true;
203 }
204 }