Merge "Improve docs for Title::getInternalURL/getCanonicalURL"
[lhc/web/wiklou.git] / includes / api / ApiStashEdit.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 */
20
21 use MediaWiki\Logger\LoggerFactory;
22 use MediaWiki\MediaWikiServices;
23 use Wikimedia\ScopedCallback;
24
25 /**
26 * Prepare an edit in shared cache so that it can be reused on edit
27 *
28 * This endpoint can be called via AJAX as the user focuses on the edit
29 * summary box. By the time of submission, the parse may have already
30 * finished, and can be immediately used on page save. Certain parser
31 * functions like {{REVISIONID}} or {{CURRENTTIME}} may cause the cache
32 * to not be used on edit. Template and files used are check for changes
33 * since the output was generated. The cache TTL is also kept low for sanity.
34 *
35 * @ingroup API
36 * @since 1.25
37 */
38 class ApiStashEdit extends ApiBase {
39 const ERROR_NONE = 'stashed';
40 const ERROR_PARSE = 'error_parse';
41 const ERROR_CACHE = 'error_cache';
42 const ERROR_UNCACHEABLE = 'uncacheable';
43 const ERROR_BUSY = 'busy';
44
45 const PRESUME_FRESH_TTL_SEC = 30;
46 const MAX_CACHE_TTL = 300; // 5 minutes
47 const MAX_SIGNATURE_TTL = 60;
48
49 const MAX_CACHE_RECENT = 2;
50
51 public function execute() {
52 $user = $this->getUser();
53 $params = $this->extractRequestParams();
54
55 if ( $user->isBot() ) { // sanity
56 $this->dieWithError( 'apierror-botsnotsupported' );
57 }
58
59 $cache = ObjectCache::getLocalClusterInstance();
60 $page = $this->getTitleOrPageId( $params );
61 $title = $page->getTitle();
62
63 if ( !ContentHandler::getForModelID( $params['contentmodel'] )
64 ->isSupportedFormat( $params['contentformat'] )
65 ) {
66 $this->dieWithError(
67 [ 'apierror-badformat-generic', $params['contentformat'], $params['contentmodel'] ],
68 'badmodelformat'
69 );
70 }
71
72 $this->requireOnlyOneParameter( $params, 'stashedtexthash', 'text' );
73
74 $text = null;
75 $textHash = null;
76 if ( $params['stashedtexthash'] !== null ) {
77 // Load from cache since the client indicates the text is the same as last stash
78 $textHash = $params['stashedtexthash'];
79 if ( !preg_match( '/^[0-9a-f]{40}$/', $textHash ) ) {
80 $this->dieWithError( 'apierror-stashedit-missingtext', 'missingtext' );
81 }
82 $textKey = $cache->makeKey( 'stashedit', 'text', $textHash );
83 $text = $cache->get( $textKey );
84 if ( !is_string( $text ) ) {
85 $this->dieWithError( 'apierror-stashedit-missingtext', 'missingtext' );
86 }
87 } else {
88 // 'text' was passed. Trim and fix newlines so the key SHA1's
89 // match (see WebRequest::getText())
90 $text = rtrim( str_replace( "\r\n", "\n", $params['text'] ) );
91 $textHash = sha1( $text );
92 }
93
94 $textContent = ContentHandler::makeContent(
95 $text, $title, $params['contentmodel'], $params['contentformat'] );
96
97 $page = WikiPage::factory( $title );
98 if ( $page->exists() ) {
99 // Page exists: get the merged content with the proposed change
100 $baseRev = Revision::newFromPageId( $page->getId(), $params['baserevid'] );
101 if ( !$baseRev ) {
102 $this->dieWithError( [ 'apierror-nosuchrevid', $params['baserevid'] ] );
103 }
104 $currentRev = $page->getRevision();
105 if ( !$currentRev ) {
106 $this->dieWithError( [ 'apierror-missingrev-pageid', $page->getId() ], 'missingrev' );
107 }
108 // Merge in the new version of the section to get the proposed version
109 $editContent = $page->replaceSectionAtRev(
110 $params['section'],
111 $textContent,
112 $params['sectiontitle'],
113 $baseRev->getId()
114 );
115 if ( !$editContent ) {
116 $this->dieWithError( 'apierror-sectionreplacefailed', 'replacefailed' );
117 }
118 if ( $currentRev->getId() == $baseRev->getId() ) {
119 // Base revision was still the latest; nothing to merge
120 $content = $editContent;
121 } else {
122 // Merge the edit into the current version
123 $baseContent = $baseRev->getContent();
124 $currentContent = $currentRev->getContent();
125 if ( !$baseContent || !$currentContent ) {
126 $this->dieWithError( [ 'apierror-missingcontent-pageid', $page->getId() ], 'missingrev' );
127 }
128 $handler = ContentHandler::getForModelID( $baseContent->getModel() );
129 $content = $handler->merge3( $baseContent, $editContent, $currentContent );
130 }
131 } else {
132 // New pages: use the user-provided content model
133 $content = $textContent;
134 }
135
136 if ( !$content ) { // merge3() failed
137 $this->getResult()->addValue( null,
138 $this->getModuleName(), [ 'status' => 'editconflict' ] );
139 return;
140 }
141
142 // The user will abort the AJAX request by pressing "save", so ignore that
143 ignore_user_abort( true );
144
145 if ( $user->pingLimiter( 'stashedit' ) ) {
146 $status = 'ratelimited';
147 } else {
148 $status = self::parseAndStash( $page, $content, $user, $params['summary'] );
149 $textKey = $cache->makeKey( 'stashedit', 'text', $textHash );
150 $cache->set( $textKey, $text, self::MAX_CACHE_TTL );
151 }
152
153 $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
154 $stats->increment( "editstash.cache_stores.$status" );
155
156 $ret = [ 'status' => $status ];
157 // If we were rate-limited, we still return the pre-existing valid hash if one was passed
158 if ( $status !== 'ratelimited' || $params['stashedtexthash'] !== null ) {
159 $ret['texthash'] = $textHash;
160 }
161
162 $this->getResult()->addValue( null, $this->getModuleName(), $ret );
163 }
164
165 /**
166 * @param WikiPage $page
167 * @param Content $content Edit content
168 * @param User $user
169 * @param string $summary Edit summary
170 * @return string ApiStashEdit::ERROR_* constant
171 * @since 1.25
172 */
173 public static function parseAndStash( WikiPage $page, Content $content, User $user, $summary ) {
174 $logger = LoggerFactory::getInstance( 'StashEdit' );
175
176 $title = $page->getTitle();
177 $key = self::getStashKey( $title, self::getContentHash( $content ), $user );
178 $fname = __METHOD__;
179
180 // Use the master DB to allow for fast blocking locks on the "save path" where this
181 // value might actually be used to complete a page edit. If the edit submission request
182 // happens before this edit stash requests finishes, then the submission will block until
183 // the stash request finishes parsing. For the lock acquisition below, there is not much
184 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
185 // blocking at all here.
186 $dbw = wfGetDB( DB_MASTER );
187 if ( !$dbw->lock( $key, $fname, 0 ) ) {
188 // De-duplicate requests on the same key
189 return self::ERROR_BUSY;
190 }
191 /** @noinspection PhpUnusedLocalVariableInspection */
192 $unlocker = new ScopedCallback( function () use ( $dbw, $key, $fname ) {
193 $dbw->unlock( $key, $fname );
194 } );
195
196 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
197
198 // Reuse any freshly build matching edit stash cache
199 $editInfo = self::getStashValue( $key );
200 if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
201 $alreadyCached = true;
202 } else {
203 $format = $content->getDefaultFormat();
204 $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
205 $alreadyCached = false;
206 }
207
208 if ( $editInfo && $editInfo->output ) {
209 // Let extensions add ParserOutput metadata or warm other caches
210 Hooks::run( 'ParserOutputStashForEdit',
211 [ $page, $content, $editInfo->output, $summary, $user ] );
212
213 $titleStr = (string)$title;
214 if ( $alreadyCached ) {
215 $logger->debug( "Already cached parser output for key '{cachekey}' ('{title}').",
216 [ 'cachekey' => $key, 'title' => $titleStr ] );
217 return self::ERROR_NONE;
218 }
219
220 $code = self::storeStashValue(
221 $key,
222 $editInfo->pstContent,
223 $editInfo->output,
224 $editInfo->timestamp,
225 $user
226 );
227
228 if ( $code === true ) {
229 $logger->debug( "Cached parser output for key '{cachekey}' ('{title}').",
230 [ 'cachekey' => $key, 'title' => $titleStr ] );
231 return self::ERROR_NONE;
232 } elseif ( $code === 'uncacheable' ) {
233 $logger->info(
234 "Uncacheable parser output for key '{cachekey}' ('{title}') [{code}].",
235 [ 'cachekey' => $key, 'title' => $titleStr, 'code' => $code ] );
236 return self::ERROR_UNCACHEABLE;
237 } else {
238 $logger->error( "Failed to cache parser output for key '{cachekey}' ('{title}').",
239 [ 'cachekey' => $key, 'title' => $titleStr, 'code' => $code ] );
240 return self::ERROR_CACHE;
241 }
242 }
243
244 return self::ERROR_PARSE;
245 }
246
247 /**
248 * Check that a prepared edit is in cache and still up-to-date
249 *
250 * This method blocks if the prepared edit is already being rendered,
251 * waiting until rendering finishes before doing final validity checks.
252 *
253 * The cache is rejected if template or file changes are detected.
254 * Note that foreign template or file transclusions are not checked.
255 *
256 * The result is a map (pstContent,output,timestamp) with fields
257 * extracted directly from WikiPage::prepareContentForEdit().
258 *
259 * @param Title $title
260 * @param Content $content
261 * @param User $user User to get parser options from
262 * @return stdClass|bool Returns false on cache miss
263 */
264 public static function checkCache( Title $title, Content $content, User $user ) {
265 if ( $user->isBot() ) {
266 return false; // bots never stash - don't pollute stats
267 }
268
269 $logger = LoggerFactory::getInstance( 'StashEdit' );
270 $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
271
272 $key = self::getStashKey( $title, self::getContentHash( $content ), $user );
273 $editInfo = self::getStashValue( $key );
274 if ( !is_object( $editInfo ) ) {
275 $start = microtime( true );
276 // We ignore user aborts and keep parsing. Block on any prior parsing
277 // so as to use its results and make use of the time spent parsing.
278 // Skip this logic if there no master connection in case this method
279 // is called on an HTTP GET request for some reason.
280 $lb = MediaWikiServices::getInstance()->getDBLoadBalancer();
281 $dbw = $lb->getAnyOpenConnection( $lb->getWriterIndex() );
282 if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
283 $editInfo = self::getStashValue( $key );
284 $dbw->unlock( $key, __METHOD__ );
285 }
286
287 $timeMs = 1000 * max( 0, microtime( true ) - $start );
288 $stats->timing( 'editstash.lock_wait_time', $timeMs );
289 }
290
291 if ( !is_object( $editInfo ) || !$editInfo->output ) {
292 $stats->increment( 'editstash.cache_misses.no_stash' );
293 $logger->debug( "Empty cache for key '$key' ('$title'); user '{$user->getName()}'." );
294 return false;
295 }
296
297 $age = time() - wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
298 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
299 // Assume nothing changed in this time
300 $stats->increment( 'editstash.cache_hits.presumed_fresh' );
301 $logger->debug( "Timestamp-based cache hit for key '$key' (age: $age sec)." );
302 } elseif ( isset( $editInfo->edits ) && $editInfo->edits === $user->getEditCount() ) {
303 // Logged-in user made no local upload/template edits in the meantime
304 $stats->increment( 'editstash.cache_hits.presumed_fresh' );
305 $logger->debug( "Edit count based cache hit for key '$key' (age: $age sec)." );
306 } elseif ( $user->isAnon()
307 && self::lastEditTime( $user ) < $editInfo->output->getCacheTime()
308 ) {
309 // Logged-out user made no local upload/template edits in the meantime
310 $stats->increment( 'editstash.cache_hits.presumed_fresh' );
311 $logger->debug( "Edit check based cache hit for key '$key' (age: $age sec)." );
312 } else {
313 // User may have changed included content
314 $editInfo = false;
315 }
316
317 if ( !$editInfo ) {
318 $stats->increment( 'editstash.cache_misses.proven_stale' );
319 $logger->info( "Stale cache for key '$key'; old key with outside edits. (age: $age sec)" );
320 } elseif ( $editInfo->output->getFlag( 'vary-revision' ) ) {
321 // This can be used for the initial parse, e.g. for filters or doEditContent(),
322 // but a second parse will be triggered in doEditUpdates(). This is not optimal.
323 $logger->info( "Cache for key '$key' ('$title') has vary_revision." );
324 } elseif ( $editInfo->output->getFlag( 'vary-revision-id' ) ) {
325 // Similar to the above if we didn't guess the ID correctly.
326 $logger->info( "Cache for key '$key' ('$title') has vary_revision_id." );
327 }
328
329 return $editInfo;
330 }
331
332 /**
333 * @param User $user
334 * @return string|null TS_MW timestamp or null
335 */
336 private static function lastEditTime( User $user ) {
337 $db = wfGetDB( DB_REPLICA );
338 $actorQuery = ActorMigration::newMigration()->getWhere( $db, 'rc_user', $user, false );
339 $time = $db->selectField(
340 [ 'recentchanges' ] + $actorQuery['tables'],
341 'MAX(rc_timestamp)',
342 [ $actorQuery['conds'] ],
343 __METHOD__,
344 [],
345 $actorQuery['joins']
346 );
347
348 return wfTimestampOrNull( TS_MW, $time );
349 }
350
351 /**
352 * Get hash of the content, factoring in model/format
353 *
354 * @param Content $content
355 * @return string
356 */
357 private static function getContentHash( Content $content ) {
358 return sha1( implode( "\n", [
359 $content->getModel(),
360 $content->getDefaultFormat(),
361 $content->serialize( $content->getDefaultFormat() )
362 ] ) );
363 }
364
365 /**
366 * Get the temporary prepared edit stash key for a user
367 *
368 * This key can be used for caching prepared edits provided:
369 * - a) The $user was used for PST options
370 * - b) The parser output was made from the PST using cannonical matching options
371 *
372 * @param Title $title
373 * @param string $contentHash Result of getContentHash()
374 * @param User $user User to get parser options from
375 * @return string
376 */
377 private static function getStashKey( Title $title, $contentHash, User $user ) {
378 return ObjectCache::getLocalClusterInstance()->makeKey(
379 'stashed-edit-info',
380 md5( $title->getPrefixedDBkey() ),
381 // Account for the edit model/text
382 $contentHash,
383 // Account for user name related variables like signatures
384 md5( $user->getId() . "\n" . $user->getName() )
385 );
386 }
387
388 /**
389 * @param string $uuid
390 * @return string
391 */
392 private static function getStashParserOutputKey( $uuid ) {
393 return ObjectCache::getLocalClusterInstance()->makeKey( 'stashed-edit-output', $uuid );
394 }
395
396 /**
397 * @param string $key
398 * @return stdClass|bool Object map (pstContent,output,outputID,timestamp,edits) or false
399 */
400 private static function getStashValue( $key ) {
401 $cache = ObjectCache::getLocalClusterInstance();
402
403 $stashInfo = $cache->get( $key );
404 if ( !is_object( $stashInfo ) ) {
405 return false;
406 }
407
408 $parserOutputKey = self::getStashParserOutputKey( $stashInfo->outputID );
409 $parserOutput = $cache->get( $parserOutputKey );
410 if ( $parserOutput instanceof ParserOutput ) {
411 $stashInfo->output = $parserOutput;
412
413 return $stashInfo;
414 }
415
416 return false;
417 }
418
419 /**
420 * Build a value to store in memcached based on the PST content and parser output
421 *
422 * This makes a simple version of WikiPage::prepareContentForEdit() as stash info
423 *
424 * @param string $key
425 * @param Content $pstContent Pre-Save transformed content
426 * @param ParserOutput $parserOutput
427 * @param string $timestamp TS_MW
428 * @param User $user
429 * @return string|bool True or an error code
430 */
431 private static function storeStashValue(
432 $key, Content $pstContent, ParserOutput $parserOutput, $timestamp, User $user
433 ) {
434 // If an item is renewed, mind the cache TTL determined by config and parser functions.
435 // Put an upper limit on the TTL for sanity to avoid extreme template/file staleness.
436 $age = time() - wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
437 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
438 // Avoid extremely stale user signature timestamps (T84843)
439 if ( $parserOutput->getFlag( 'user-signature' ) ) {
440 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
441 }
442
443 if ( $ttl <= 0 ) {
444 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
445 }
446
447 // Store what is actually needed and split the output into another key (T204742)
448 $parseroutputID = md5( $key );
449 $stashInfo = (object)[
450 'pstContent' => $pstContent,
451 'outputID' => $parseroutputID,
452 'timestamp' => $timestamp,
453 'edits' => $user->getEditCount()
454 ];
455
456 $cache = ObjectCache::getLocalClusterInstance();
457 $ok = $cache->set( $key, $stashInfo, $ttl );
458 if ( $ok ) {
459 $ok = $cache->set(
460 self::getStashParserOutputKey( $parseroutputID ),
461 $parserOutput,
462 $ttl
463 );
464 }
465
466 if ( $ok ) {
467 // These blobs can waste slots in low cardinality memcached slabs
468 self::pruneExcessStashedEntries( $cache, $user, $key );
469 }
470
471 return $ok ? true : 'store_error';
472 }
473
474 /**
475 * @param BagOStuff $cache
476 * @param User $user
477 * @param string $newKey
478 */
479 private static function pruneExcessStashedEntries( BagOStuff $cache, User $user, $newKey ) {
480 $key = $cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
481
482 $keyList = $cache->get( $key ) ?: [];
483 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
484 $oldestKey = array_shift( $keyList );
485 $cache->delete( $oldestKey );
486 }
487
488 $keyList[] = $newKey;
489 $cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
490 }
491
492 public function getAllowedParams() {
493 return [
494 'title' => [
495 ApiBase::PARAM_TYPE => 'string',
496 ApiBase::PARAM_REQUIRED => true
497 ],
498 'section' => [
499 ApiBase::PARAM_TYPE => 'string',
500 ],
501 'sectiontitle' => [
502 ApiBase::PARAM_TYPE => 'string'
503 ],
504 'text' => [
505 ApiBase::PARAM_TYPE => 'text',
506 ApiBase::PARAM_DFLT => null
507 ],
508 'stashedtexthash' => [
509 ApiBase::PARAM_TYPE => 'string',
510 ApiBase::PARAM_DFLT => null
511 ],
512 'summary' => [
513 ApiBase::PARAM_TYPE => 'string',
514 ],
515 'contentmodel' => [
516 ApiBase::PARAM_TYPE => ContentHandler::getContentModels(),
517 ApiBase::PARAM_REQUIRED => true
518 ],
519 'contentformat' => [
520 ApiBase::PARAM_TYPE => ContentHandler::getAllContentFormats(),
521 ApiBase::PARAM_REQUIRED => true
522 ],
523 'baserevid' => [
524 ApiBase::PARAM_TYPE => 'integer',
525 ApiBase::PARAM_REQUIRED => true
526 ]
527 ];
528 }
529
530 public function needsToken() {
531 return 'csrf';
532 }
533
534 public function mustBePosted() {
535 return true;
536 }
537
538 public function isWriteMode() {
539 return true;
540 }
541
542 public function isInternal() {
543 return true;
544 }
545 }