Merge "(bug 35923) tweaks to mediawiki.action.history.diff.css"
[lhc/web/wiklou.git] / includes / cache / HTMLCacheUpdate.php
1 <?php
2 /**
3 * HTML cache invalidation of all pages linking to a given title.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Cache
22 */
23
24 /**
25 * Class to invalidate the HTML cache of all the pages linking to a given title.
26 * Small numbers of links will be done immediately, large numbers are pushed onto
27 * the job queue.
28 *
29 * This class is designed to work efficiently with small numbers of links, and
30 * to work reasonably well with up to ~10^5 links. Above ~10^6 links, the memory
31 * and time requirements of loading all backlinked IDs in doUpdate() might become
32 * prohibitive. The requirements measured at Wikimedia are approximately:
33 *
34 * memory: 48 bytes per row
35 * time: 16us per row for the query plus processing
36 *
37 * The reason this query is done is to support partitioning of the job
38 * by backlinked ID. The memory issue could be allieviated by doing this query in
39 * batches, but of course LIMIT with an offset is inefficient on the DB side.
40 *
41 * The class is nevertheless a vast improvement on the previous method of using
42 * File::getLinksTo() and Title::touchArray(), which uses about 2KB of memory per
43 * link.
44 *
45 * @ingroup Cache
46 */
47 class HTMLCacheUpdate implements DeferrableUpdate {
48 /**
49 * @var Title
50 */
51 public $mTitle;
52
53 public $mTable, $mPrefix, $mStart, $mEnd;
54 public $mRowsPerJob, $mRowsPerQuery;
55
56 /**
57 * @param $titleTo
58 * @param $table
59 * @param $start bool
60 * @param $end bool
61 */
62 function __construct( $titleTo, $table, $start = false, $end = false ) {
63 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
64
65 $this->mTitle = $titleTo;
66 $this->mTable = $table;
67 $this->mStart = $start;
68 $this->mEnd = $end;
69 $this->mRowsPerJob = $wgUpdateRowsPerJob;
70 $this->mRowsPerQuery = $wgUpdateRowsPerQuery;
71 $this->mCache = $this->mTitle->getBacklinkCache();
72 }
73
74 public function doUpdate() {
75 if ( $this->mStart || $this->mEnd ) {
76 $this->doPartialUpdate();
77 return;
78 }
79
80 # Get an estimate of the number of rows from the BacklinkCache
81 $numRows = $this->mCache->getNumLinks( $this->mTable );
82 if ( $numRows > $this->mRowsPerJob * 2 ) {
83 # Do fast cached partition
84 $this->insertJobs();
85 } else {
86 # Get the links from the DB
87 $titleArray = $this->mCache->getLinks( $this->mTable );
88 # Check if the row count estimate was correct
89 if ( $titleArray->count() > $this->mRowsPerJob * 2 ) {
90 # Not correct, do accurate partition
91 wfDebug( __METHOD__.": row count estimate was incorrect, repartitioning\n" );
92 $this->insertJobsFromTitles( $titleArray );
93 } else {
94 $this->invalidateTitles( $titleArray );
95 }
96 }
97 }
98
99 /**
100 * Update some of the backlinks, defined by a page ID range
101 */
102 protected function doPartialUpdate() {
103 $titleArray = $this->mCache->getLinks( $this->mTable, $this->mStart, $this->mEnd );
104 if ( $titleArray->count() <= $this->mRowsPerJob * 2 ) {
105 # This partition is small enough, do the update
106 $this->invalidateTitles( $titleArray );
107 } else {
108 # Partitioning was excessively inaccurate. Divide the job further.
109 # This can occur when a large number of links are added in a short
110 # period of time, say by updating a heavily-used template.
111 $this->insertJobsFromTitles( $titleArray );
112 }
113 }
114
115 /**
116 * Partition the current range given by $this->mStart and $this->mEnd,
117 * using a pre-calculated title array which gives the links in that range.
118 * Queue the resulting jobs.
119 *
120 * @param $titleArray array
121 */
122 protected function insertJobsFromTitles( $titleArray ) {
123 # We make subpartitions in the sense that the start of the first job
124 # will be the start of the parent partition, and the end of the last
125 # job will be the end of the parent partition.
126 $jobs = array();
127 $start = $this->mStart; # start of the current job
128 $numTitles = 0;
129 foreach ( $titleArray as $title ) {
130 $id = $title->getArticleID();
131 # $numTitles is now the number of titles in the current job not
132 # including the current ID
133 if ( $numTitles >= $this->mRowsPerJob ) {
134 # Add a job up to but not including the current ID
135 $params = array(
136 'table' => $this->mTable,
137 'start' => $start,
138 'end' => $id - 1
139 );
140 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
141 $start = $id;
142 $numTitles = 0;
143 }
144 $numTitles++;
145 }
146 # Last job
147 $params = array(
148 'table' => $this->mTable,
149 'start' => $start,
150 'end' => $this->mEnd
151 );
152 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
153 wfDebug( __METHOD__.": repartitioning into " . count( $jobs ) . " jobs\n" );
154
155 if ( count( $jobs ) < 2 ) {
156 # I don't think this is possible at present, but handling this case
157 # makes the code a bit more robust against future code updates and
158 # avoids a potential infinite loop of repartitioning
159 wfDebug( __METHOD__.": repartitioning failed!\n" );
160 $this->invalidateTitles( $titleArray );
161 return;
162 }
163
164 Job::batchInsert( $jobs );
165 }
166
167 /**
168 * @return mixed
169 */
170 protected function insertJobs() {
171 $batches = $this->mCache->partition( $this->mTable, $this->mRowsPerJob );
172 if ( !$batches ) {
173 return;
174 }
175 $jobs = array();
176 foreach ( $batches as $batch ) {
177 $params = array(
178 'table' => $this->mTable,
179 'start' => $batch[0],
180 'end' => $batch[1],
181 );
182 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
183 }
184 Job::batchInsert( $jobs );
185 }
186
187 /**
188 * Invalidate an array (or iterator) of Title objects, right now
189 * @param $titleArray array
190 */
191 protected function invalidateTitles( $titleArray ) {
192 global $wgUseFileCache, $wgUseSquid;
193
194 $dbw = wfGetDB( DB_MASTER );
195 $timestamp = $dbw->timestamp();
196
197 # Get all IDs in this query into an array
198 $ids = array();
199 foreach ( $titleArray as $title ) {
200 $ids[] = $title->getArticleID();
201 }
202
203 if ( !$ids ) {
204 return;
205 }
206
207 # Update page_touched
208 $batches = array_chunk( $ids, $this->mRowsPerQuery );
209 foreach ( $batches as $batch ) {
210 $dbw->update( 'page',
211 array( 'page_touched' => $timestamp ),
212 array( 'page_id' => $batch ),
213 __METHOD__
214 );
215 }
216
217 # Update squid
218 if ( $wgUseSquid ) {
219 $u = SquidUpdate::newFromTitles( $titleArray );
220 $u->doUpdate();
221 }
222
223 # Update file cache
224 if ( $wgUseFileCache ) {
225 foreach ( $titleArray as $title ) {
226 HTMLFileCache::clearFileCache( $title );
227 }
228 }
229 }
230 }