[JobQueue] Minor doc cleanup.
[lhc/web/wiklou.git] / includes / job / JobQueue.php
1 <?php
2 /**
3 * Job queue base code.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @defgroup JobQueue JobQueue
22 * @author Aaron Schulz
23 */
24
25 /**
26 * Class to handle enqueueing and running of background jobs
27 *
28 * @ingroup JobQueue
29 * @since 1.21
30 */
31 abstract class JobQueue {
32 protected $wiki; // string; wiki ID
33 protected $type; // string; job type
34 protected $order; // string; job priority for pop()
35 protected $claimTTL; // integer; seconds
36
37 const QoS_Atomic = 1; // integer; "all-or-nothing" job insertions
38
39 /**
40 * @param $params array
41 */
42 protected function __construct( array $params ) {
43 $this->wiki = $params['wiki'];
44 $this->type = $params['type'];
45 $this->order = isset( $params['order'] ) ? $params['order'] : 'random';
46 $this->claimTTL = isset( $params['claimTTL'] ) ? $params['claimTTL'] : 0;
47 }
48
49 /**
50 * Get a job queue object of the specified type.
51 * $params includes:
52 * - class : What job class to use (determines job type)
53 * - wiki : wiki ID of the wiki the jobs are for (defaults to current wiki)
54 * - type : The name of the job types this queue handles
55 * - order : Order that pop() selects jobs, one of "fifo", "timestamp" or "random".
56 * If "fifo" is used, the queue will effectively be FIFO. Note that
57 * job completion will not appear to be exactly FIFO if there are multiple
58 * job runners since jobs can take different times to finish once popped.
59 * If "timestamp" is used, the queue will at least be loosely ordered
60 * by timestamp, allowing for some jobs to be popped off out of order.
61 * If "random" is used, pop() will pick jobs in random order. This might be
62 * useful for improving concurrency depending on the queue storage medium.
63 * - claimTTL : If supported, the queue will recycle jobs that have been popped
64 * but not acknowledged as completed after this many seconds.
65 *
66 * Queue classes should throw an exception if they do not support the options given.
67 *
68 * @param $params array
69 * @return JobQueue
70 * @throws MWException
71 */
72 final public static function factory( array $params ) {
73 $class = $params['class'];
74 if ( !MWInit::classExists( $class ) ) {
75 throw new MWException( "Invalid job queue class '$class'." );
76 }
77 $obj = new $class( $params );
78 if ( !( $obj instanceof self ) ) {
79 throw new MWException( "Class '$class' is not a " . __CLASS__ . " class." );
80 }
81 return $obj;
82 }
83
84 /**
85 * @return string Wiki ID
86 */
87 final public function getWiki() {
88 return $this->wiki;
89 }
90
91 /**
92 * @return string Job type that this queue handles
93 */
94 final public function getType() {
95 return $this->type;
96 }
97
98 /**
99 * Quickly check if the queue is empty (has no available jobs).
100 * Queue classes should use caching if they are any slower without memcached.
101 *
102 * @return bool
103 */
104 final public function isEmpty() {
105 wfProfileIn( __METHOD__ );
106 $res = $this->doIsEmpty();
107 wfProfileOut( __METHOD__ );
108 return $res;
109 }
110
111 /**
112 * @see JobQueue::isEmpty()
113 * @return bool
114 */
115 abstract protected function doIsEmpty();
116
117 /**
118 * Get the number of available jobs in the queue.
119 * Queue classes should use caching if they are any slower without memcached.
120 *
121 * @return integer
122 */
123 final public function getSize() {
124 wfProfileIn( __METHOD__ );
125 $res = $this->doGetSize();
126 wfProfileOut( __METHOD__ );
127 return $res;
128 }
129
130 /**
131 * @see JobQueue::getSize()
132 * @return integer
133 */
134 abstract protected function doGetSize();
135
136 /**
137 * Get the number of acquired jobs (these are temporarily out of the queue).
138 * Queue classes should use caching if they are any slower without memcached.
139 *
140 * @return integer
141 */
142 final public function getAcquiredCount() {
143 wfProfileIn( __METHOD__ );
144 $res = $this->doGetAcquiredCount();
145 wfProfileOut( __METHOD__ );
146 return $res;
147 }
148
149 /**
150 * @see JobQueue::getAcquiredCount()
151 * @return integer
152 */
153 abstract protected function doGetAcquiredCount();
154
155 /**
156 * Push a batch of jobs into the queue
157 *
158 * @param $jobs array List of Jobs
159 * @param $flags integer Bitfield (supports JobQueue::QoS_Atomic)
160 * @throws MWException
161 * @return bool
162 */
163 final public function batchPush( array $jobs, $flags = 0 ) {
164 foreach ( $jobs as $job ) {
165 if ( $job->getType() !== $this->type ) {
166 throw new MWException( "Got '{$job->getType()}' job; expected '{$this->type}'." );
167 }
168 }
169 wfProfileIn( __METHOD__ );
170 $ok = $this->doBatchPush( $jobs, $flags );
171 wfProfileOut( __METHOD__ );
172 return $ok;
173 }
174
175 /**
176 * @see JobQueue::batchPush()
177 * @return bool
178 */
179 abstract protected function doBatchPush( array $jobs, $flags );
180
181 /**
182 * Pop a job off of the queue
183 *
184 * @return Job|bool Returns false on failure
185 */
186 final public function pop() {
187 wfProfileIn( __METHOD__ );
188 $job = $this->doPop();
189 wfProfileOut( __METHOD__ );
190 return $job;
191 }
192
193 /**
194 * @see JobQueue::pop()
195 * @return Job
196 */
197 abstract protected function doPop();
198
199 /**
200 * Acknowledge that a job was completed.
201 *
202 * This does nothing for certain queue classes or if "claimTTL" is not set.
203 *
204 * @param $job Job
205 * @throws MWException
206 * @return bool
207 */
208 final public function ack( Job $job ) {
209 if ( $job->getType() !== $this->type ) {
210 throw new MWException( "Got '{$job->getType()}' job; expected '{$this->type}'." );
211 }
212 wfProfileIn( __METHOD__ );
213 $ok = $this->doAck( $job );
214 wfProfileOut( __METHOD__ );
215 return $ok;
216 }
217
218 /**
219 * @see JobQueue::ack()
220 * @return bool
221 */
222 abstract protected function doAck( Job $job );
223
224 /**
225 * Register the "root job" of a given job into the queue for de-duplication.
226 * This should only be called right *after* all the new jobs have been inserted.
227 * This is used to turn older, duplicate, job entries into no-ops. The root job
228 * information will remain in the registry until it simply falls out of cache.
229 *
230 * This requires that $job has two special fields in the "params" array:
231 * - rootJobSignature : hash (e.g. SHA1) that identifies the task
232 * - rootJobTimestamp : TS_MW timestamp of this instance of the task
233 *
234 * A "root job" is a conceptual job that consist of potentially many smaller jobs
235 * that are actually inserted into the queue. For example, "refreshLinks" jobs are
236 * spawned when a template is edited. One can think of the task as "update links
237 * of pages that use template X" and an instance of that task as a "root job".
238 * However, what actually goes into the queue are potentially many refreshLinks2 jobs.
239 * Since these jobs include things like page ID ranges and DB master positions, and morph
240 * into smaller refreshLinks2 jobs recursively, simple duplicate detection (like job_sha1)
241 * for individual jobs being identical is not useful.
242 *
243 * In the case of "refreshLinks", if these jobs are still in the queue when the template
244 * is edited again, we want all of these old refreshLinks jobs for that template to become
245 * no-ops. This can greatly reduce server load, since refreshLinks jobs involves parsing.
246 * Essentially, the new batch of jobs belong to a new "root job" and the older ones to a
247 * previous "root job" for the same task of "update links of pages that use template X".
248 *
249 * This does nothing for certain queue classes.
250 *
251 * @param $job Job
252 * @throws MWException
253 * @return bool
254 */
255 final public function deduplicateRootJob( Job $job ) {
256 if ( $job->getType() !== $this->type ) {
257 throw new MWException( "Got '{$job->getType()}' job; expected '{$this->type}'." );
258 }
259 wfProfileIn( __METHOD__ );
260 $ok = $this->doDeduplicateRootJob( $job );
261 wfProfileOut( __METHOD__ );
262 return $ok;
263 }
264
265 /**
266 * @see JobQueue::deduplicateRootJob()
267 * @param $job Job
268 * @return bool
269 */
270 protected function doDeduplicateRootJob( Job $job ) {
271 return true;
272 }
273
274 /**
275 * Wait for any slaves or backup servers to catch up.
276 *
277 * This does nothing for certain queue classes.
278 *
279 * @return void
280 */
281 final public function waitForBackups() {
282 wfProfileIn( __METHOD__ );
283 $this->doWaitForBackups();
284 wfProfileOut( __METHOD__ );
285 }
286
287 /**
288 * @see JobQueue::waitForBackups()
289 * @return void
290 */
291 protected function doWaitForBackups() {}
292 }