Fix some IDEA errors in BacklinkJobUtils
[lhc/web/wiklou.git] / includes / jobqueue / utils / BacklinkJobUtils.php
1 <?php
2 /**
3 * Job to update links for a given title.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup JobQueue
22 * @author Aaron Schulz
23 */
24
25 /**
26 * Class with Backlink related Job helper methods
27 *
28 * @ingroup JobQueue
29 * @since 1.23
30 */
31 class BacklinkJobUtils {
32 /**
33 * Break down $job into approximately ($bSize/$cSize) leaf jobs and a single partition
34 * job that covers the remaining backlink range (if needed). Jobs for the first $bSize
35 * titles are collated ($cSize per job) into leaf jobs to do actual work. All the
36 * resulting jobs are of the same class as $job. No partition job is returned if the
37 * range covered by $job was less than $bSize, as the leaf jobs have full coverage.
38 *
39 * The leaf jobs have the 'pages' param set to a (<page ID>:(<namespace>,<DB key>),...)
40 * map so that the run() function knows what pages to act on. The leaf jobs will keep
41 * the same job title as the parent job (e.g. $job).
42 *
43 * The partition jobs have the 'range' parameter set to a map of the format
44 * (start:<integer>, end:<integer>, batchSize:<integer>, subranges:((<start>,<end>),...)),
45 * the 'table' parameter set to that of $job, and the 'recursive' parameter set to true.
46 * This method can be called on the resulting job to repeat the process again.
47 *
48 * The job provided ($job) must have the 'recursive' parameter set to true and the 'table'
49 * parameter must be set to a backlink table. The job title will be used as the title to
50 * find backlinks for. Any 'range' parameter must follow the same format as mentioned above.
51 * This should be managed by recursive calls to this method.
52 *
53 * The first jobs return are always the leaf jobs. This lets the caller use push() to
54 * put them directly into the queue and works well if the queue is FIFO. In such a queue,
55 * the leaf jobs have to get finished first before anything can resolve the next partition
56 * job, which keeps the queue very small.
57 *
58 * $opts includes:
59 * - params : extra job parameters to include in each job
60 *
61 * @param Job $job
62 * @param int $bSize BacklinkCache partition size; usually $wgUpdateRowsPerJob
63 * @param int $cSize Max titles per leaf job; Usually 1 or a modest value
64 * @param array $opts Optional parameter map
65 * @return Job[] List of Job objects
66 */
67 public static function partitionBacklinkJob( Job $job, $bSize, $cSize, $opts = array() ) {
68 $class = get_class( $job );
69 $title = $job->getTitle();
70 $params = $job->getParams();
71
72 if ( isset( $params['pages'] ) || empty( $params['recursive'] ) ) {
73 $ranges = array(); // sanity; this is a leaf node
74 $realBSize = 0;
75 wfWarn( __METHOD__ . " called on {$job->getType()} leaf job (explosive recursion)." );
76 } elseif ( isset( $params['range'] ) ) {
77 // This is a range job to trigger the insertion of partitioned/title jobs...
78 $ranges = $params['range']['subranges'];
79 $realBSize = $params['range']['batchSize'];
80 } else {
81 // This is a base job to trigger the insertion of partitioned jobs...
82 $ranges = $title->getBacklinkCache()->partition( $params['table'], $bSize );
83 $realBSize = $bSize;
84 }
85
86 $extraParams = isset( $opts['params'] ) ? $opts['params'] : array();
87
88 $jobs = array();
89 // Combine the first range (of size $bSize) backlinks into leaf jobs
90 if ( isset( $ranges[0] ) ) {
91 list( $start, $end ) = $ranges[0];
92 $iter = $title->getBacklinkCache()->getLinks( $params['table'], $start, $end );
93 $titles = iterator_to_array( $iter );
94 /** @var Title[] $titleBatch */
95 foreach ( array_chunk( $titles, $cSize ) as $titleBatch ) {
96 $pages = array();
97 foreach ( $titleBatch as $tl ) {
98 $pages[$tl->getArticleId()] = array( $tl->getNamespace(), $tl->getDBKey() );
99 }
100 $jobs[] = new $class(
101 $title, // maintain parent job title
102 array( 'pages' => $pages ) + $extraParams
103 );
104 }
105 }
106 // Take all of the remaining ranges and build a partition job from it
107 if ( isset( $ranges[1] ) ) {
108 $jobs[] = new $class(
109 $title, // maintain parent job title
110 array(
111 'recursive' => true,
112 'table' => $params['table'],
113 'range' => array(
114 'start' => $ranges[1][0],
115 'end' => $ranges[count( $ranges ) - 1][1],
116 'batchSize' => $realBSize,
117 'subranges' => array_slice( $ranges, 1 )
118 ),
119 ) + $extraParams
120 );
121 }
122
123 return $jobs;
124 }
125 }