ApiQueryDeletedRevisions: Optimize ascending title generation
[lhc/web/wiklou.git] / includes / api / ApiQueryAllDeletedRevisions.php
1 <?php
2 /**
3 * Created on Oct 3, 2014
4 *
5 * Copyright © 2014 Brad Jorsch "bjorsch@wikimedia.org"
6 *
7 * Heavily based on ApiQueryDeletedrevs,
8 * Copyright © 2007 Roan Kattouw "<Firstname>.<Lastname>@gmail.com"
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 * http://www.gnu.org/copyleft/gpl.html
24 *
25 * @file
26 */
27
28 /**
29 * Query module to enumerate all deleted revisions.
30 *
31 * @ingroup API
32 */
33 class ApiQueryAllDeletedRevisions extends ApiQueryRevisionsBase {
34
35 public function __construct( ApiQuery $query, $moduleName ) {
36 parent::__construct( $query, $moduleName, 'adr' );
37 }
38
39 /**
40 * @param ApiPageSet $resultPageSet
41 * @return void
42 */
43 protected function run( ApiPageSet $resultPageSet = null ) {
44 $user = $this->getUser();
45 // Before doing anything at all, let's check permissions
46 if ( !$user->isAllowed( 'deletedhistory' ) ) {
47 $this->dieUsage(
48 'You don\'t have permission to view deleted revision information',
49 'permissiondenied'
50 );
51 }
52
53 $db = $this->getDB();
54 $params = $this->extractRequestParams( false );
55
56 $result = $this->getResult();
57 $pageSet = $this->getPageSet();
58 $titles = $pageSet->getTitles();
59
60 // This module operates in two modes:
61 // 'user': List deleted revs by a certain user
62 // 'all': List all deleted revs in NS
63 $mode = 'all';
64 if ( !is_null( $params['user'] ) ) {
65 $mode = 'user';
66 }
67
68 if ( $mode == 'user' ) {
69 foreach ( array( 'from', 'to', 'prefix', 'excludeuser' ) as $param ) {
70 if ( !is_null( $params[$param] ) ) {
71 $p = $this->getModulePrefix();
72 $this->dieUsage( "The '{$p}{$param}' parameter cannot be used with '{$p}user'",
73 'badparams' );
74 }
75 }
76 } else {
77 foreach ( array( 'start', 'end' ) as $param ) {
78 if ( !is_null( $params[$param] ) ) {
79 $p = $this->getModulePrefix();
80 $this->dieUsage( "The '{$p}{$param}' parameter may only be used with '{$p}user'",
81 'badparams' );
82 }
83 }
84 }
85
86 // If we're generating titles only, we can use DISTINCT for a better
87 // query. But we can't do that in 'user' mode (wrong index), and we can
88 // only do it when sorting ASC (because MySQL apparently can't use an
89 // index backwards for grouping even though it can for ORDER BY, WTF?)
90 $dir = $params['dir'];
91 $optimizeGenerateTitles = false;
92 if ( $mode === 'all' && $params['generatetitles'] && $resultPageSet !== null ) {
93 if ( $dir === 'newer' ) {
94 $optimizeGenerateTitles = true;
95 } else {
96 $p = $this->getModulePrefix();
97 $this->setWarning( "For better performance when generating titles, set {$p}dir=newer" );
98 }
99 }
100
101 $this->addTables( 'archive' );
102 if ( $resultPageSet === null ) {
103 $this->parseParameters( $params );
104 $this->addFields( Revision::selectArchiveFields() );
105 $this->addFields( array( 'ar_title', 'ar_namespace' ) );
106 } else {
107 $this->limit = $this->getParameter( 'limit' ) ?: 10;
108 $this->addFields( array( 'ar_title', 'ar_namespace' ) );
109 if ( $optimizeGenerateTitles ) {
110 $this->addOption( 'DISTINCT' );
111 } else {
112 $this->addFields( array( 'ar_timestamp', 'ar_rev_id', 'ar_id' ) );
113 }
114 }
115
116 if ( $this->fld_tags ) {
117 $this->addTables( 'tag_summary' );
118 $this->addJoinConds(
119 array( 'tag_summary' => array( 'LEFT JOIN', array( 'ar_rev_id=ts_rev_id' ) ) )
120 );
121 $this->addFields( 'ts_tags' );
122 }
123
124 if ( !is_null( $params['tag'] ) ) {
125 $this->addTables( 'change_tag' );
126 $this->addJoinConds(
127 array( 'change_tag' => array( 'INNER JOIN', array( 'ar_rev_id=ct_rev_id' ) ) )
128 );
129 $this->addWhereFld( 'ct_tag', $params['tag'] );
130 }
131
132 if ( $this->fetchContent ) {
133 // Modern MediaWiki has the content for deleted revs in the 'text'
134 // table using fields old_text and old_flags. But revisions deleted
135 // pre-1.5 store the content in the 'archive' table directly using
136 // fields ar_text and ar_flags, and no corresponding 'text' row. So
137 // we have to LEFT JOIN and fetch all four fields.
138 $this->addTables( 'text' );
139 $this->addJoinConds(
140 array( 'text' => array( 'LEFT JOIN', array( 'ar_text_id=old_id' ) ) )
141 );
142 $this->addFields( array( 'ar_text', 'ar_flags', 'old_text', 'old_flags' ) );
143
144 // This also means stricter restrictions
145 if ( !$user->isAllowedAny( 'undelete', 'deletedtext' ) ) {
146 $this->dieUsage(
147 'You don\'t have permission to view deleted revision content',
148 'permissiondenied'
149 );
150 }
151 }
152
153 $miser_ns = null;
154
155 if ( $mode == 'all' ) {
156 if ( $params['namespace'] !== null ) {
157 $namespaces = $params['namespace'];
158 $this->addWhereFld( 'ar_namespace', $namespaces );
159 } else {
160 $namespaces = MWNamespace::getValidNamespaces();
161 }
162
163 // For from/to/prefix, we have to consider the potential
164 // transformations of the title in all specified namespaces.
165 // Generally there will be only one transformation, but wikis with
166 // some namespaces case-sensitive could have two.
167 if ( $params['from'] !== null || $params['to'] !== null ) {
168 $isDirNewer = ( $dir === 'newer' );
169 $after = ( $isDirNewer ? '>=' : '<=' );
170 $before = ( $isDirNewer ? '<=' : '>=' );
171 $where = array();
172 foreach ( $namespaces as $ns ) {
173 $w = array();
174 if ( $params['from'] !== null ) {
175 $w[] = 'ar_title' . $after .
176 $db->addQuotes( $this->titlePartToKey( $params['from'], $ns ) );
177 }
178 if ( $params['to'] !== null ) {
179 $w[] = 'ar_title' . $before .
180 $db->addQuotes( $this->titlePartToKey( $params['to'], $ns ) );
181 }
182 $w = $db->makeList( $w, LIST_AND );
183 $where[$w][] = $ns;
184 }
185 if ( count( $where ) == 1 ) {
186 $where = key( $where );
187 $this->addWhere( $where );
188 } else {
189 $where2 = array();
190 foreach ( $where as $w => $ns ) {
191 $where2[] = $db->makeList( array( $w, 'ar_namespace' => $ns ), LIST_AND );
192 }
193 $this->addWhere( $db->makeList( $where2, LIST_OR ) );
194 }
195 }
196
197 if ( isset( $params['prefix'] ) ) {
198 $where = array();
199 foreach ( $namespaces as $ns ) {
200 $w = 'ar_title' . $db->buildLike(
201 $this->titlePartToKey( $params['prefix'], $ns ),
202 $db->anyString() );
203 $where[$w][] = $ns;
204 }
205 if ( count( $where ) == 1 ) {
206 $where = key( $where );
207 $this->addWhere( $where );
208 } else {
209 $where2 = array();
210 foreach ( $where as $w => $ns ) {
211 $where2[] = $db->makeList( array( $w, 'ar_namespace' => $ns ), LIST_AND );
212 }
213 $this->addWhere( $db->makeList( $where2, LIST_OR ) );
214 }
215 }
216 } else {
217 if ( $this->getConfig()->get( 'MiserMode' ) ) {
218 $miser_ns = $params['namespace'];
219 } else {
220 $this->addWhereFld( 'ar_namespace', $params['namespace'] );
221 }
222 $this->addTimestampWhereRange( 'ar_timestamp', $dir, $params['start'], $params['end'] );
223 }
224
225 if ( !is_null( $params['user'] ) ) {
226 $this->addWhereFld( 'ar_user_text', $params['user'] );
227 } elseif ( !is_null( $params['excludeuser'] ) ) {
228 $this->addWhere( 'ar_user_text != ' .
229 $db->addQuotes( $params['excludeuser'] ) );
230 }
231
232 if ( !is_null( $params['user'] ) || !is_null( $params['excludeuser'] ) ) {
233 // Paranoia: avoid brute force searches (bug 17342)
234 // (shouldn't be able to get here without 'deletedhistory', but
235 // check it again just in case)
236 if ( !$user->isAllowed( 'deletedhistory' ) ) {
237 $bitmask = Revision::DELETED_USER;
238 } elseif ( !$user->isAllowedAny( 'suppressrevision', 'viewsuppressed' ) ) {
239 $bitmask = Revision::DELETED_USER | Revision::DELETED_RESTRICTED;
240 } else {
241 $bitmask = 0;
242 }
243 if ( $bitmask ) {
244 $this->addWhere( $db->bitAnd( 'ar_deleted', $bitmask ) . " != $bitmask" );
245 }
246 }
247
248 if ( !is_null( $params['continue'] ) ) {
249 $cont = explode( '|', $params['continue'] );
250 $op = ( $dir == 'newer' ? '>' : '<' );
251 if ( $optimizeGenerateTitles ) {
252 $this->dieContinueUsageIf( count( $cont ) != 2 );
253 $ns = intval( $cont[0] );
254 $this->dieContinueUsageIf( strval( $ns ) !== $cont[0] );
255 $title = $db->addQuotes( $cont[1] );
256 $this->addWhere( "ar_namespace $op $ns OR " .
257 "(ar_namespace = $ns AND ar_title $op= $title)" );
258 } elseif ( $mode == 'all' ) {
259 $this->dieContinueUsageIf( count( $cont ) != 4 );
260 $ns = intval( $cont[0] );
261 $this->dieContinueUsageIf( strval( $ns ) !== $cont[0] );
262 $title = $db->addQuotes( $cont[1] );
263 $ts = $db->addQuotes( $db->timestamp( $cont[2] ) );
264 $ar_id = (int)$cont[3];
265 $this->dieContinueUsageIf( strval( $ar_id ) !== $cont[3] );
266 $this->addWhere( "ar_namespace $op $ns OR " .
267 "(ar_namespace = $ns AND " .
268 "(ar_title $op $title OR " .
269 "(ar_title = $title AND " .
270 "(ar_timestamp $op $ts OR " .
271 "(ar_timestamp = $ts AND " .
272 "ar_id $op= $ar_id)))))" );
273 } else {
274 $this->dieContinueUsageIf( count( $cont ) != 2 );
275 $ts = $db->addQuotes( $db->timestamp( $cont[0] ) );
276 $ar_id = (int)$cont[1];
277 $this->dieContinueUsageIf( strval( $ar_id ) !== $cont[1] );
278 $this->addWhere( "ar_timestamp $op $ts OR " .
279 "(ar_timestamp = $ts AND " .
280 "ar_id $op= $ar_id)" );
281 }
282 }
283
284 $this->addOption( 'LIMIT', $this->limit + 1 );
285
286 $sort = ( $dir == 'newer' ? '' : ' DESC' );
287 $orderby = array();
288 if ( $optimizeGenerateTitles ) {
289 // Targeting index name_title_timestamp
290 if ( $params['namespace'] === null || count( array_unique( $params['namespace'] ) ) > 1 ) {
291 $orderby[] = "ar_namespace $sort";
292 }
293 $orderby[] = "ar_title $sort";
294 } elseif ( $mode == 'all' ) {
295 // Targeting index name_title_timestamp
296 if ( $params['namespace'] === null || count( array_unique( $params['namespace'] ) ) > 1 ) {
297 $orderby[] = "ar_namespace $sort";
298 }
299 $orderby[] = "ar_title $sort";
300 $orderby[] = "ar_timestamp $sort";
301 $orderby[] = "ar_id $sort";
302 } else {
303 // Targeting index usertext_timestamp
304 // 'user' is always constant.
305 $orderby[] = "ar_timestamp $sort";
306 $orderby[] = "ar_id $sort";
307 }
308 $this->addOption( 'ORDER BY', $orderby );
309
310 $res = $this->select( __METHOD__ );
311 $pageMap = array(); // Maps ns&title to array index
312 $count = 0;
313 $nextIndex = 0;
314 $generated = array();
315 foreach ( $res as $row ) {
316 if ( ++$count > $this->limit ) {
317 // We've had enough
318 if ( $optimizeGenerateTitles ) {
319 $this->setContinueEnumParameter( 'continue', "$row->ar_namespace|$row->ar_title" );
320 } elseif ( $mode == 'all' ) {
321 $this->setContinueEnumParameter( 'continue',
322 "$row->ar_namespace|$row->ar_title|$row->ar_timestamp|$row->ar_id"
323 );
324 } else {
325 $this->setContinueEnumParameter( 'continue', "$row->ar_timestamp|$row->ar_id" );
326 }
327 break;
328 }
329
330 // Miser mode namespace check
331 if ( $miser_ns !== null && !in_array( $row->ar_namespace, $miser_ns ) ) {
332 continue;
333 }
334
335 if ( $resultPageSet !== null ) {
336 if ( $params['generatetitles'] ) {
337 $key = "{$row->ar_namespace}:{$row->ar_title}";
338 if ( !isset( $generated[$key] ) ) {
339 $generated[$key] = Title::makeTitle( $row->ar_namespace, $row->ar_title );
340 }
341 } else {
342 $generated[] = $row->ar_rev_id;
343 }
344 } else {
345 $revision = Revision::newFromArchiveRow( $row );
346 $rev = $this->extractRevisionInfo( $revision, $row );
347
348 if ( !isset( $pageMap[$row->ar_namespace][$row->ar_title] ) ) {
349 $index = $nextIndex++;
350 $pageMap[$row->ar_namespace][$row->ar_title] = $index;
351 $title = $revision->getTitle();
352 $a = array(
353 'pageid' => $title->getArticleID(),
354 'revisions' => array( $rev ),
355 );
356 ApiResult::setIndexedTagName( $a['revisions'], 'rev' );
357 ApiQueryBase::addTitleInfo( $a, $title );
358 $fit = $result->addValue( array( 'query', $this->getModuleName() ), $index, $a );
359 } else {
360 $index = $pageMap[$row->ar_namespace][$row->ar_title];
361 $fit = $result->addValue(
362 array( 'query', $this->getModuleName(), $index, 'revisions' ),
363 null, $rev );
364 }
365 if ( !$fit ) {
366 if ( $mode == 'all' ) {
367 $this->setContinueEnumParameter( 'continue',
368 "$row->ar_namespace|$row->ar_title|$row->ar_timestamp|$row->ar_id"
369 );
370 } else {
371 $this->setContinueEnumParameter( 'continue', "$row->ar_timestamp|$row->ar_id" );
372 }
373 break;
374 }
375 }
376 }
377
378 if ( $resultPageSet !== null ) {
379 if ( $params['generatetitles'] ) {
380 $resultPageSet->populateFromTitles( $generated );
381 } else {
382 $resultPageSet->populateFromRevisionIDs( $generated );
383 }
384 } else {
385 $result->addIndexedTagName( array( 'query', $this->getModuleName() ), 'page' );
386 }
387 }
388
389 public function getAllowedParams() {
390 $ret = parent::getAllowedParams() + array(
391 'user' => array(
392 ApiBase::PARAM_TYPE => 'user'
393 ),
394 'namespace' => array(
395 ApiBase::PARAM_ISMULTI => true,
396 ApiBase::PARAM_TYPE => 'namespace',
397 ApiBase::PARAM_DFLT => null,
398 ),
399 'start' => array(
400 ApiBase::PARAM_TYPE => 'timestamp',
401 ApiBase::PARAM_HELP_MSG_INFO => array( array( 'useronly' ) ),
402 ),
403 'end' => array(
404 ApiBase::PARAM_TYPE => 'timestamp',
405 ApiBase::PARAM_HELP_MSG_INFO => array( array( 'useronly' ) ),
406 ),
407 'dir' => array(
408 ApiBase::PARAM_TYPE => array(
409 'newer',
410 'older'
411 ),
412 ApiBase::PARAM_DFLT => 'older',
413 ApiBase::PARAM_HELP_MSG => 'api-help-param-direction',
414 ),
415 'from' => array(
416 ApiBase::PARAM_HELP_MSG_INFO => array( array( 'nonuseronly' ) ),
417 ),
418 'to' => array(
419 ApiBase::PARAM_HELP_MSG_INFO => array( array( 'nonuseronly' ) ),
420 ),
421 'prefix' => array(
422 ApiBase::PARAM_HELP_MSG_INFO => array( array( 'nonuseronly' ) ),
423 ),
424 'excludeuser' => array(
425 ApiBase::PARAM_TYPE => 'user',
426 ApiBase::PARAM_HELP_MSG_INFO => array( array( 'nonuseronly' ) ),
427 ),
428 'tag' => null,
429 'continue' => array(
430 ApiBase::PARAM_HELP_MSG => 'api-help-param-continue',
431 ),
432 'generatetitles' => array(
433 ApiBase::PARAM_DFLT => false
434 ),
435 );
436
437 if ( $this->getConfig()->get( 'MiserMode' ) ) {
438 $ret['user'][ApiBase::PARAM_HELP_MSG_APPEND] = array(
439 'apihelp-query+alldeletedrevisions-param-miser-user-namespace',
440 );
441 $ret['namespace'][ApiBase::PARAM_HELP_MSG_APPEND] = array(
442 'apihelp-query+alldeletedrevisions-param-miser-user-namespace',
443 );
444 }
445
446 return $ret;
447 }
448
449 protected function getExamplesMessages() {
450 return array(
451 'action=query&list=alldeletedrevisions&adruser=Example&adrlimit=50'
452 => 'apihelp-query+alldeletedrevisions-example-user',
453 'action=query&list=alldeletedrevisions&adrdir=newer&adrlimit=50'
454 => 'apihelp-query+alldeletedrevisions-example-ns-main',
455 );
456 }
457
458 public function getHelpUrls() {
459 return 'https://www.mediawiki.org/wiki/API:Alldeletedrevisions';
460 }
461 }