Merge "Add part to update ctd_user_defined in populateChangeTagDef"
[lhc/web/wiklou.git] / includes / content / ContentHandler.php
1 <?php
2 /**
3 * Base class for content handling.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @since 1.21
21 *
22 * @file
23 * @ingroup Content
24 *
25 * @author Daniel Kinzler
26 */
27
28 use Wikimedia\Assert\Assert;
29 use MediaWiki\Logger\LoggerFactory;
30 use MediaWiki\MediaWikiServices;
31 use MediaWiki\Search\ParserOutputSearchDataExtractor;
32
33 /**
34 * A content handler knows how do deal with a specific type of content on a wiki
35 * page. Content is stored in the database in a serialized form (using a
36 * serialization format a.k.a. MIME type) and is unserialized into its native
37 * PHP representation (the content model), which is wrapped in an instance of
38 * the appropriate subclass of Content.
39 *
40 * ContentHandler instances are stateless singletons that serve, among other
41 * things, as a factory for Content objects. Generally, there is one subclass
42 * of ContentHandler and one subclass of Content for every type of content model.
43 *
44 * Some content types have a flat model, that is, their native representation
45 * is the same as their serialized form. Examples would be JavaScript and CSS
46 * code. As of now, this also applies to wikitext (MediaWiki's default content
47 * type), but wikitext content may be represented by a DOM or AST structure in
48 * the future.
49 *
50 * @ingroup Content
51 */
52 abstract class ContentHandler {
53 /**
54 * Convenience function for getting flat text from a Content object. This
55 * should only be used in the context of backwards compatibility with code
56 * that is not yet able to handle Content objects!
57 *
58 * If $content is null, this method returns the empty string.
59 *
60 * If $content is an instance of TextContent, this method returns the flat
61 * text as returned by $content->getNativeData().
62 *
63 * If $content is not a TextContent object, the behavior of this method
64 * depends on the global $wgContentHandlerTextFallback:
65 * - If $wgContentHandlerTextFallback is 'fail' and $content is not a
66 * TextContent object, an MWException is thrown.
67 * - If $wgContentHandlerTextFallback is 'serialize' and $content is not a
68 * TextContent object, $content->serialize() is called to get a string
69 * form of the content.
70 * - If $wgContentHandlerTextFallback is 'ignore' and $content is not a
71 * TextContent object, this method returns null.
72 * - otherwise, the behavior is undefined.
73 *
74 * @since 1.21
75 *
76 * @param Content|null $content
77 *
78 * @throws MWException If the content is not an instance of TextContent and
79 * wgContentHandlerTextFallback was set to 'fail'.
80 * @return string|null Textual form of the content, if available.
81 */
82 public static function getContentText( Content $content = null ) {
83 global $wgContentHandlerTextFallback;
84
85 if ( is_null( $content ) ) {
86 return '';
87 }
88
89 if ( $content instanceof TextContent ) {
90 return $content->getNativeData();
91 }
92
93 wfDebugLog( 'ContentHandler', 'Accessing ' . $content->getModel() . ' content as text!' );
94
95 if ( $wgContentHandlerTextFallback == 'fail' ) {
96 throw new MWException(
97 "Attempt to get text from Content with model " .
98 $content->getModel()
99 );
100 }
101
102 if ( $wgContentHandlerTextFallback == 'serialize' ) {
103 return $content->serialize();
104 }
105
106 return null;
107 }
108
109 /**
110 * Convenience function for creating a Content object from a given textual
111 * representation.
112 *
113 * $text will be deserialized into a Content object of the model specified
114 * by $modelId (or, if that is not given, $title->getContentModel()) using
115 * the given format.
116 *
117 * @since 1.21
118 *
119 * @param string $text The textual representation, will be
120 * unserialized to create the Content object
121 * @param Title|null $title The title of the page this text belongs to.
122 * Required if $modelId is not provided.
123 * @param string|null $modelId The model to deserialize to. If not provided,
124 * $title->getContentModel() is used.
125 * @param string|null $format The format to use for deserialization. If not
126 * given, the model's default format is used.
127 *
128 * @throws MWException If model ID or format is not supported or if the text can not be
129 * unserialized using the format.
130 * @return Content A Content object representing the text.
131 */
132 public static function makeContent( $text, Title $title = null,
133 $modelId = null, $format = null ) {
134 if ( is_null( $modelId ) ) {
135 if ( is_null( $title ) ) {
136 throw new MWException( "Must provide a Title object or a content model ID." );
137 }
138
139 $modelId = $title->getContentModel();
140 }
141
142 $handler = self::getForModelID( $modelId );
143
144 return $handler->unserializeContent( $text, $format );
145 }
146
147 /**
148 * Returns the name of the default content model to be used for the page
149 * with the given title.
150 *
151 * Note: There should rarely be need to call this method directly.
152 * To determine the actual content model for a given page, use
153 * Title::getContentModel().
154 *
155 * Which model is to be used by default for the page is determined based
156 * on several factors:
157 * - The global setting $wgNamespaceContentModels specifies a content model
158 * per namespace.
159 * - The hook ContentHandlerDefaultModelFor may be used to override the page's default
160 * model.
161 * - Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript
162 * model if they end in .js or .css, respectively.
163 * - Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
164 * - The hook TitleIsCssOrJsPage may be used to force a page to use the CSS
165 * or JavaScript model. This is a compatibility feature. The ContentHandlerDefaultModelFor
166 * hook should be used instead if possible.
167 * - The hook TitleIsWikitextPage may be used to force a page to use the
168 * wikitext model. This is a compatibility feature. The ContentHandlerDefaultModelFor
169 * hook should be used instead if possible.
170 *
171 * If none of the above applies, the wikitext model is used.
172 *
173 * Note: this is used by, and may thus not use, Title::getContentModel()
174 *
175 * @since 1.21
176 *
177 * @param Title $title
178 *
179 * @return string Default model name for the page given by $title
180 */
181 public static function getDefaultModelFor( Title $title ) {
182 // NOTE: this method must not rely on $title->getContentModel() directly or indirectly,
183 // because it is used to initialize the mContentModel member.
184
185 $ns = $title->getNamespace();
186
187 $ext = false;
188 $m = null;
189 $model = MWNamespace::getNamespaceContentModel( $ns );
190
191 // Hook can determine default model
192 if ( !Hooks::run( 'ContentHandlerDefaultModelFor', [ $title, &$model ] ) ) {
193 if ( !is_null( $model ) ) {
194 return $model;
195 }
196 }
197
198 // Could this page contain code based on the title?
199 $isCodePage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js|json)$!u', $title->getText(), $m );
200 if ( $isCodePage ) {
201 $ext = $m[1];
202 }
203
204 // Is this a user subpage containing code?
205 $isCodeSubpage = NS_USER == $ns
206 && !$isCodePage
207 && preg_match( "/\\/.*\\.(js|css|json)$/", $title->getText(), $m );
208 if ( $isCodeSubpage ) {
209 $ext = $m[1];
210 }
211
212 // Is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
213 $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
214 $isWikitext = $isWikitext && !$isCodePage && !$isCodeSubpage;
215
216 if ( !$isWikitext ) {
217 switch ( $ext ) {
218 case 'js':
219 return CONTENT_MODEL_JAVASCRIPT;
220 case 'css':
221 return CONTENT_MODEL_CSS;
222 case 'json':
223 return CONTENT_MODEL_JSON;
224 default:
225 return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
226 }
227 }
228
229 // We established that it must be wikitext
230
231 return CONTENT_MODEL_WIKITEXT;
232 }
233
234 /**
235 * Returns the appropriate ContentHandler singleton for the given title.
236 *
237 * @since 1.21
238 *
239 * @param Title $title
240 *
241 * @return ContentHandler
242 */
243 public static function getForTitle( Title $title ) {
244 $modelId = $title->getContentModel();
245
246 return self::getForModelID( $modelId );
247 }
248
249 /**
250 * Returns the appropriate ContentHandler singleton for the given Content
251 * object.
252 *
253 * @since 1.21
254 *
255 * @param Content $content
256 *
257 * @return ContentHandler
258 */
259 public static function getForContent( Content $content ) {
260 $modelId = $content->getModel();
261
262 return self::getForModelID( $modelId );
263 }
264
265 /**
266 * @var array A Cache of ContentHandler instances by model id
267 */
268 protected static $handlers;
269
270 /**
271 * Returns the ContentHandler singleton for the given model ID. Use the
272 * CONTENT_MODEL_XXX constants to identify the desired content model.
273 *
274 * ContentHandler singletons are taken from the global $wgContentHandlers
275 * array. Keys in that array are model names, the values are either
276 * ContentHandler singleton objects, or strings specifying the appropriate
277 * subclass of ContentHandler.
278 *
279 * If a class name is encountered when looking up the singleton for a given
280 * model name, the class is instantiated and the class name is replaced by
281 * the resulting singleton in $wgContentHandlers.
282 *
283 * If no ContentHandler is defined for the desired $modelId, the
284 * ContentHandler may be provided by the ContentHandlerForModelID hook.
285 * If no ContentHandler can be determined, an MWException is raised.
286 *
287 * @since 1.21
288 *
289 * @param string $modelId The ID of the content model for which to get a
290 * handler. Use CONTENT_MODEL_XXX constants.
291 *
292 * @throws MWException For internal errors and problems in the configuration.
293 * @throws MWUnknownContentModelException If no handler is known for the model ID.
294 * @return ContentHandler The ContentHandler singleton for handling the model given by the ID.
295 */
296 public static function getForModelID( $modelId ) {
297 global $wgContentHandlers;
298
299 if ( isset( self::$handlers[$modelId] ) ) {
300 return self::$handlers[$modelId];
301 }
302
303 if ( empty( $wgContentHandlers[$modelId] ) ) {
304 $handler = null;
305
306 Hooks::run( 'ContentHandlerForModelID', [ $modelId, &$handler ] );
307
308 if ( $handler === null ) {
309 throw new MWUnknownContentModelException( $modelId );
310 }
311
312 if ( !( $handler instanceof ContentHandler ) ) {
313 throw new MWException( "ContentHandlerForModelID must supply a ContentHandler instance" );
314 }
315 } else {
316 $classOrCallback = $wgContentHandlers[$modelId];
317
318 if ( is_callable( $classOrCallback ) ) {
319 $handler = call_user_func( $classOrCallback, $modelId );
320 } else {
321 $handler = new $classOrCallback( $modelId );
322 }
323
324 if ( !( $handler instanceof ContentHandler ) ) {
325 throw new MWException( "$classOrCallback from \$wgContentHandlers is not " .
326 "compatible with ContentHandler" );
327 }
328 }
329
330 wfDebugLog( 'ContentHandler', 'Created handler for ' . $modelId
331 . ': ' . get_class( $handler ) );
332
333 self::$handlers[$modelId] = $handler;
334
335 return self::$handlers[$modelId];
336 }
337
338 /**
339 * Clean up handlers cache.
340 */
341 public static function cleanupHandlersCache() {
342 self::$handlers = [];
343 }
344
345 /**
346 * Returns the localized name for a given content model.
347 *
348 * Model names are localized using system messages. Message keys
349 * have the form content-model-$name, where $name is getContentModelName( $id ).
350 *
351 * @param string $name The content model ID, as given by a CONTENT_MODEL_XXX
352 * constant or returned by Revision::getContentModel().
353 * @param Language|null $lang The language to parse the message in (since 1.26)
354 *
355 * @throws MWException If the model ID isn't known.
356 * @return string The content model's localized name.
357 */
358 public static function getLocalizedName( $name, Language $lang = null ) {
359 // Messages: content-model-wikitext, content-model-text,
360 // content-model-javascript, content-model-css
361 $key = "content-model-$name";
362
363 $msg = wfMessage( $key );
364 if ( $lang ) {
365 $msg->inLanguage( $lang );
366 }
367
368 return $msg->exists() ? $msg->plain() : $name;
369 }
370
371 public static function getContentModels() {
372 global $wgContentHandlers;
373
374 $models = array_keys( $wgContentHandlers );
375 Hooks::run( 'GetContentModels', [ &$models ] );
376 return $models;
377 }
378
379 public static function getAllContentFormats() {
380 global $wgContentHandlers;
381
382 $formats = [];
383
384 foreach ( $wgContentHandlers as $model => $class ) {
385 $handler = self::getForModelID( $model );
386 $formats = array_merge( $formats, $handler->getSupportedFormats() );
387 }
388
389 $formats = array_unique( $formats );
390
391 return $formats;
392 }
393
394 // ------------------------------------------------------------------------
395
396 /**
397 * @var string
398 */
399 protected $mModelID;
400
401 /**
402 * @var string[]
403 */
404 protected $mSupportedFormats;
405
406 /**
407 * Constructor, initializing the ContentHandler instance with its model ID
408 * and a list of supported formats. Values for the parameters are typically
409 * provided as literals by subclass's constructors.
410 *
411 * @param string $modelId (use CONTENT_MODEL_XXX constants).
412 * @param string[] $formats List for supported serialization formats
413 * (typically as MIME types)
414 */
415 public function __construct( $modelId, $formats ) {
416 $this->mModelID = $modelId;
417 $this->mSupportedFormats = $formats;
418 }
419
420 /**
421 * Serializes a Content object of the type supported by this ContentHandler.
422 *
423 * @since 1.21
424 *
425 * @param Content $content The Content object to serialize
426 * @param string|null $format The desired serialization format
427 *
428 * @return string Serialized form of the content
429 */
430 abstract public function serializeContent( Content $content, $format = null );
431
432 /**
433 * Applies transformations on export (returns the blob unchanged per default).
434 * Subclasses may override this to perform transformations such as conversion
435 * of legacy formats or filtering of internal meta-data.
436 *
437 * @param string $blob The blob to be exported
438 * @param string|null $format The blob's serialization format
439 *
440 * @return string
441 */
442 public function exportTransform( $blob, $format = null ) {
443 return $blob;
444 }
445
446 /**
447 * Unserializes a Content object of the type supported by this ContentHandler.
448 *
449 * @since 1.21
450 *
451 * @param string $blob Serialized form of the content
452 * @param string|null $format The format used for serialization
453 *
454 * @return Content The Content object created by deserializing $blob
455 */
456 abstract public function unserializeContent( $blob, $format = null );
457
458 /**
459 * Apply import transformation (per default, returns $blob unchanged).
460 * This gives subclasses an opportunity to transform data blobs on import.
461 *
462 * @since 1.24
463 *
464 * @param string $blob
465 * @param string|null $format
466 *
467 * @return string
468 */
469 public function importTransform( $blob, $format = null ) {
470 return $blob;
471 }
472
473 /**
474 * Creates an empty Content object of the type supported by this
475 * ContentHandler.
476 *
477 * @since 1.21
478 *
479 * @return Content
480 */
481 abstract public function makeEmptyContent();
482
483 /**
484 * Creates a new Content object that acts as a redirect to the given page,
485 * or null if redirects are not supported by this content model.
486 *
487 * This default implementation always returns null. Subclasses supporting redirects
488 * must override this method.
489 *
490 * Note that subclasses that override this method to return a Content object
491 * should also override supportsRedirects() to return true.
492 *
493 * @since 1.21
494 *
495 * @param Title $destination The page to redirect to.
496 * @param string $text Text to include in the redirect, if possible.
497 *
498 * @return Content Always null.
499 */
500 public function makeRedirectContent( Title $destination, $text = '' ) {
501 return null;
502 }
503
504 /**
505 * Returns the model id that identifies the content model this
506 * ContentHandler can handle. Use with the CONTENT_MODEL_XXX constants.
507 *
508 * @since 1.21
509 *
510 * @return string The model ID
511 */
512 public function getModelID() {
513 return $this->mModelID;
514 }
515
516 /**
517 * @since 1.21
518 *
519 * @param string $model_id The model to check
520 *
521 * @throws MWException If the model ID is not the ID of the content model supported by this
522 * ContentHandler.
523 */
524 protected function checkModelID( $model_id ) {
525 if ( $model_id !== $this->mModelID ) {
526 throw new MWException( "Bad content model: " .
527 "expected {$this->mModelID} " .
528 "but got $model_id." );
529 }
530 }
531
532 /**
533 * Returns a list of serialization formats supported by the
534 * serializeContent() and unserializeContent() methods of this
535 * ContentHandler.
536 *
537 * @since 1.21
538 *
539 * @return string[] List of serialization formats as MIME type like strings
540 */
541 public function getSupportedFormats() {
542 return $this->mSupportedFormats;
543 }
544
545 /**
546 * The format used for serialization/deserialization by default by this
547 * ContentHandler.
548 *
549 * This default implementation will return the first element of the array
550 * of formats that was passed to the constructor.
551 *
552 * @since 1.21
553 *
554 * @return string The name of the default serialization format as a MIME type
555 */
556 public function getDefaultFormat() {
557 return $this->mSupportedFormats[0];
558 }
559
560 /**
561 * Returns true if $format is a serialization format supported by this
562 * ContentHandler, and false otherwise.
563 *
564 * Note that if $format is null, this method always returns true, because
565 * null means "use the default format".
566 *
567 * @since 1.21
568 *
569 * @param string $format The serialization format to check
570 *
571 * @return bool
572 */
573 public function isSupportedFormat( $format ) {
574 if ( !$format ) {
575 return true; // this means "use the default"
576 }
577
578 return in_array( $format, $this->mSupportedFormats );
579 }
580
581 /**
582 * Convenient for checking whether a format provided as a parameter is actually supported.
583 *
584 * @param string $format The serialization format to check
585 *
586 * @throws MWException If the format is not supported by this content handler.
587 */
588 protected function checkFormat( $format ) {
589 if ( !$this->isSupportedFormat( $format ) ) {
590 throw new MWException(
591 "Format $format is not supported for content model "
592 . $this->getModelID()
593 );
594 }
595 }
596
597 /**
598 * Returns overrides for action handlers.
599 * Classes listed here will be used instead of the default one when
600 * (and only when) $wgActions[$action] === true. This allows subclasses
601 * to override the default action handlers.
602 *
603 * @since 1.21
604 *
605 * @return array An array mapping action names (typically "view", "edit", "history" etc.) to
606 * either the full qualified class name of an Action class, a callable taking ( Page $page,
607 * IContextSource $context = null ) as parameters and returning an Action object, or an actual
608 * Action object. An empty array in this default implementation.
609 *
610 * @see Action::factory
611 */
612 public function getActionOverrides() {
613 return [];
614 }
615
616 /**
617 * Factory for creating an appropriate DifferenceEngine for this content model.
618 * Since 1.32, this is only used for page-level diffs; to diff two content objects,
619 * use getSlotDiffRenderer.
620 *
621 * The DifferenceEngine subclass to use is selected in getDiffEngineClass(). The
622 * GetDifferenceEngine hook will receive the DifferenceEngine object and can replace or
623 * wrap it.
624 * (Note that in older versions of MediaWiki the hook documentation instructed extensions
625 * to return false from the hook; you should not rely on always being able to decorate
626 * the DifferenceEngine instance from the hook. If the owner of the content type wants to
627 * decorare the instance, overriding this method is a safer approach.)
628 *
629 * @todo This is page-level functionality so it should not belong to ContentHandler.
630 * Move it to a better place once one exists (e.g. PageTypeHandler).
631 *
632 * @since 1.21
633 *
634 * @param IContextSource $context Context to use, anything else will be ignored.
635 * @param int $old Revision ID we want to show and diff with.
636 * @param int|string $new Either a revision ID or one of the strings 'cur', 'prev' or 'next'.
637 * @param int $rcid FIXME: Deprecated, no longer used. Defaults to 0.
638 * @param bool $refreshCache If set, refreshes the diff cache. Defaults to false.
639 * @param bool $unhide If set, allow viewing deleted revs. Defaults to false.
640 *
641 * @return DifferenceEngine
642 */
643 public function createDifferenceEngine( IContextSource $context, $old = 0, $new = 0,
644 $rcid = 0, // FIXME: Deprecated, no longer used
645 $refreshCache = false, $unhide = false
646 ) {
647 $diffEngineClass = $this->getDiffEngineClass();
648 $differenceEngine = new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
649 Hooks::run( 'GetDifferenceEngine', [ $context, $old, $new, $refreshCache, $unhide,
650 &$differenceEngine ] );
651 return $differenceEngine;
652 }
653
654 /**
655 * Get an appropriate SlotDiffRenderer for this content model.
656 * @since 1.32
657 * @param IContextSource $context
658 * @return SlotDiffRenderer
659 */
660 final public function getSlotDiffRenderer( IContextSource $context ) {
661 $slotDiffRenderer = $this->getSlotDiffRendererInternal( $context );
662 if ( get_class( $slotDiffRenderer ) === TextSlotDiffRenderer::class ) {
663 // To keep B/C, when SlotDiffRenderer is not overridden for a given content type
664 // but DifferenceEngine is, use that instead.
665 $differenceEngine = $this->createDifferenceEngine( $context );
666 if ( get_class( $differenceEngine ) !== DifferenceEngine::class ) {
667 // TODO turn this into a deprecation warning in a later release
668 LoggerFactory::getInstance( 'diff' )->info(
669 'Falling back to DifferenceEngineSlotDiffRenderer', [
670 'modelID' => $this->getModelID(),
671 'DifferenceEngine' => get_class( $differenceEngine ),
672 ] );
673 $slotDiffRenderer = new DifferenceEngineSlotDiffRenderer( $differenceEngine );
674 }
675 }
676 Hooks::run( 'GetSlotDiffRenderer', [ $this, &$slotDiffRenderer, $context ] );
677 return $slotDiffRenderer;
678 }
679
680 /**
681 * Return the SlotDiffRenderer appropriate for this content handler.
682 * @param IContextSource $context
683 * @return SlotDiffRenderer
684 */
685 protected function getSlotDiffRendererInternal( IContextSource $context ) {
686 $contentLanguage = MediaWikiServices::getInstance()->getContentLanguage();
687 $statsdDataFactory = MediaWikiServices::getInstance()->getStatsdDataFactory();
688 $slotDiffRenderer = new TextSlotDiffRenderer();
689 $slotDiffRenderer->setStatsdDataFactory( $statsdDataFactory );
690 // XXX using the page language would be better, but it's unclear how that should be injected
691 $slotDiffRenderer->setLanguage( $contentLanguage );
692 $slotDiffRenderer->setWikiDiff2MovedParagraphDetectionCutoff(
693 $context->getConfig()->get( 'WikiDiff2MovedParagraphDetectionCutoff' )
694 );
695
696 $engine = DifferenceEngine::getEngine();
697 if ( $engine === false ) {
698 $slotDiffRenderer->setEngine( TextSlotDiffRenderer::ENGINE_PHP );
699 } elseif ( $engine === 'wikidiff2' ) {
700 $slotDiffRenderer->setEngine( TextSlotDiffRenderer::ENGINE_WIKIDIFF2 );
701 } else {
702 $slotDiffRenderer->setEngine( TextSlotDiffRenderer::ENGINE_EXTERNAL, $engine );
703 }
704
705 return $slotDiffRenderer;
706 }
707
708 /**
709 * Get the language in which the content of the given page is written.
710 *
711 * This default implementation just returns the content language (except for pages
712 * in the MediaWiki namespace)
713 *
714 * Note that the pages language is not cacheable, since it may in some
715 * cases depend on user settings.
716 *
717 * Also note that the page language may or may not depend on the actual content of the page,
718 * that is, this method may load the content in order to determine the language.
719 *
720 * @since 1.21
721 *
722 * @param Title $title The page to determine the language for.
723 * @param Content|null $content The page's content, if you have it handy, to avoid reloading it.
724 *
725 * @return Language The page's language
726 */
727 public function getPageLanguage( Title $title, Content $content = null ) {
728 global $wgLang;
729 $pageLang = MediaWikiServices::getInstance()->getContentLanguage();
730
731 if ( $title->getNamespace() == NS_MEDIAWIKI ) {
732 // Parse mediawiki messages with correct target language
733 list( /* $unused */, $lang ) = MessageCache::singleton()->figureMessage( $title->getText() );
734 $pageLang = Language::factory( $lang );
735 }
736
737 Hooks::run( 'PageContentLanguage', [ $title, &$pageLang, $wgLang ] );
738
739 return wfGetLangObj( $pageLang );
740 }
741
742 /**
743 * Get the language in which the content of this page is written when
744 * viewed by user. Defaults to $this->getPageLanguage(), but if the user
745 * specified a preferred variant, the variant will be used.
746 *
747 * This default implementation just returns $this->getPageLanguage( $title, $content ) unless
748 * the user specified a preferred variant.
749 *
750 * Note that the pages view language is not cacheable, since it depends on user settings.
751 *
752 * Also note that the page language may or may not depend on the actual content of the page,
753 * that is, this method may load the content in order to determine the language.
754 *
755 * @since 1.21
756 *
757 * @param Title $title The page to determine the language for.
758 * @param Content|null $content The page's content, if you have it handy, to avoid reloading it.
759 *
760 * @return Language The page's language for viewing
761 */
762 public function getPageViewLanguage( Title $title, Content $content = null ) {
763 $pageLang = $this->getPageLanguage( $title, $content );
764
765 if ( $title->getNamespace() !== NS_MEDIAWIKI ) {
766 // If the user chooses a variant, the content is actually
767 // in a language whose code is the variant code.
768 $variant = $pageLang->getPreferredVariant();
769 if ( $pageLang->getCode() !== $variant ) {
770 $pageLang = Language::factory( $variant );
771 }
772 }
773
774 return $pageLang;
775 }
776
777 /**
778 * Determines whether the content type handled by this ContentHandler
779 * can be used on the given page.
780 *
781 * This default implementation always returns true.
782 * Subclasses may override this to restrict the use of this content model to specific locations,
783 * typically based on the namespace or some other aspect of the title, such as a special suffix
784 * (e.g. ".svg" for SVG content).
785 *
786 * @note this calls the ContentHandlerCanBeUsedOn hook which may be used to override which
787 * content model can be used where.
788 *
789 * @param Title $title The page's title.
790 *
791 * @return bool True if content of this kind can be used on the given page, false otherwise.
792 */
793 public function canBeUsedOn( Title $title ) {
794 $ok = true;
795
796 Hooks::run( 'ContentModelCanBeUsedOn', [ $this->getModelID(), $title, &$ok ] );
797
798 return $ok;
799 }
800
801 /**
802 * Returns the name of the diff engine to use.
803 *
804 * @since 1.21
805 *
806 * @return string
807 */
808 protected function getDiffEngineClass() {
809 return DifferenceEngine::class;
810 }
811
812 /**
813 * Attempts to merge differences between three versions. Returns a new
814 * Content object for a clean merge and false for failure or a conflict.
815 *
816 * This default implementation always returns false.
817 *
818 * @since 1.21
819 *
820 * @param Content $oldContent The page's previous content.
821 * @param Content $myContent One of the page's conflicting contents.
822 * @param Content $yourContent One of the page's conflicting contents.
823 *
824 * @return Content|bool Always false.
825 */
826 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
827 return false;
828 }
829
830 /**
831 * Return type of change if one exists for the given edit.
832 *
833 * @since 1.31
834 *
835 * @param Content|null $oldContent The previous text of the page.
836 * @param Content|null $newContent The submitted text of the page.
837 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
838 *
839 * @return string|null String key representing type of change, or null.
840 */
841 private function getChangeType(
842 Content $oldContent = null,
843 Content $newContent = null,
844 $flags = 0
845 ) {
846 $oldTarget = $oldContent !== null ? $oldContent->getRedirectTarget() : null;
847 $newTarget = $newContent !== null ? $newContent->getRedirectTarget() : null;
848
849 // We check for the type of change in the given edit, and return string key accordingly
850
851 // Blanking of a page
852 if ( $oldContent && $oldContent->getSize() > 0 &&
853 $newContent && $newContent->getSize() === 0
854 ) {
855 return 'blank';
856 }
857
858 // Redirects
859 if ( $newTarget ) {
860 if ( !$oldTarget ) {
861 // New redirect page (by creating new page or by changing content page)
862 return 'new-redirect';
863 } elseif ( !$newTarget->equals( $oldTarget ) ||
864 $oldTarget->getFragment() !== $newTarget->getFragment()
865 ) {
866 // Redirect target changed
867 return 'changed-redirect-target';
868 }
869 } elseif ( $oldTarget ) {
870 // Changing an existing redirect into a non-redirect
871 return 'removed-redirect';
872 }
873
874 // New page created
875 if ( $flags & EDIT_NEW && $newContent ) {
876 if ( $newContent->getSize() === 0 ) {
877 // New blank page
878 return 'newblank';
879 } else {
880 return 'newpage';
881 }
882 }
883
884 // Removing more than 90% of the page
885 if ( $oldContent && $newContent && $oldContent->getSize() > 10 * $newContent->getSize() ) {
886 return 'replace';
887 }
888
889 // Content model changed
890 if ( $oldContent && $newContent && $oldContent->getModel() !== $newContent->getModel() ) {
891 return 'contentmodelchange';
892 }
893
894 return null;
895 }
896
897 /**
898 * Return an applicable auto-summary if one exists for the given edit.
899 *
900 * @since 1.21
901 *
902 * @param Content|null $oldContent The previous text of the page.
903 * @param Content|null $newContent The submitted text of the page.
904 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
905 *
906 * @return string An appropriate auto-summary, or an empty string.
907 */
908 public function getAutosummary(
909 Content $oldContent = null,
910 Content $newContent = null,
911 $flags = 0
912 ) {
913 $changeType = $this->getChangeType( $oldContent, $newContent, $flags );
914
915 // There's no applicable auto-summary for our case, so our auto-summary is empty.
916 if ( !$changeType ) {
917 return '';
918 }
919
920 // Decide what kind of auto-summary is needed.
921 switch ( $changeType ) {
922 case 'new-redirect':
923 $newTarget = $newContent->getRedirectTarget();
924 $truncatedtext = $newContent->getTextForSummary(
925 250
926 - strlen( wfMessage( 'autoredircomment' )->inContentLanguage()->text() )
927 - strlen( $newTarget->getFullText() )
928 );
929
930 return wfMessage( 'autoredircomment', $newTarget->getFullText() )
931 ->plaintextParams( $truncatedtext )->inContentLanguage()->text();
932 case 'changed-redirect-target':
933 $oldTarget = $oldContent->getRedirectTarget();
934 $newTarget = $newContent->getRedirectTarget();
935
936 $truncatedtext = $newContent->getTextForSummary(
937 250
938 - strlen( wfMessage( 'autosumm-changed-redirect-target' )
939 ->inContentLanguage()->text() )
940 - strlen( $oldTarget->getFullText() )
941 - strlen( $newTarget->getFullText() )
942 );
943
944 return wfMessage( 'autosumm-changed-redirect-target',
945 $oldTarget->getFullText(),
946 $newTarget->getFullText() )
947 ->rawParams( $truncatedtext )->inContentLanguage()->text();
948 case 'removed-redirect':
949 $oldTarget = $oldContent->getRedirectTarget();
950 $truncatedtext = $newContent->getTextForSummary(
951 250
952 - strlen( wfMessage( 'autosumm-removed-redirect' )
953 ->inContentLanguage()->text() )
954 - strlen( $oldTarget->getFullText() ) );
955
956 return wfMessage( 'autosumm-removed-redirect', $oldTarget->getFullText() )
957 ->rawParams( $truncatedtext )->inContentLanguage()->text();
958 case 'newpage':
959 // If they're making a new article, give its text, truncated, in the summary.
960 $truncatedtext = $newContent->getTextForSummary(
961 200 - strlen( wfMessage( 'autosumm-new' )->inContentLanguage()->text() ) );
962
963 return wfMessage( 'autosumm-new' )->rawParams( $truncatedtext )
964 ->inContentLanguage()->text();
965 case 'blank':
966 return wfMessage( 'autosumm-blank' )->inContentLanguage()->text();
967 case 'replace':
968 $truncatedtext = $newContent->getTextForSummary(
969 200 - strlen( wfMessage( 'autosumm-replace' )->inContentLanguage()->text() ) );
970
971 return wfMessage( 'autosumm-replace' )->rawParams( $truncatedtext )
972 ->inContentLanguage()->text();
973 case 'newblank':
974 return wfMessage( 'autosumm-newblank' )->inContentLanguage()->text();
975 default:
976 return '';
977 }
978 }
979
980 /**
981 * Return an applicable tag if one exists for the given edit or return null.
982 *
983 * @since 1.31
984 *
985 * @param Content|null $oldContent The previous text of the page.
986 * @param Content|null $newContent The submitted text of the page.
987 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
988 *
989 * @return string|null An appropriate tag, or null.
990 */
991 public function getChangeTag(
992 Content $oldContent = null,
993 Content $newContent = null,
994 $flags = 0
995 ) {
996 $changeType = $this->getChangeType( $oldContent, $newContent, $flags );
997
998 // There's no applicable tag for this change.
999 if ( !$changeType ) {
1000 return null;
1001 }
1002
1003 // Core tags use the same keys as ones returned from $this->getChangeType()
1004 // but prefixed with pseudo namespace 'mw-', so we add the prefix before checking
1005 // if this type of change should be tagged
1006 $tag = 'mw-' . $changeType;
1007
1008 // Not all change types are tagged, so we check against the list of defined tags.
1009 if ( in_array( $tag, ChangeTags::getSoftwareTags() ) ) {
1010 return $tag;
1011 }
1012
1013 return null;
1014 }
1015
1016 /**
1017 * Auto-generates a deletion reason
1018 *
1019 * @since 1.21
1020 *
1021 * @param Title $title The page's title
1022 * @param bool &$hasHistory Whether the page has a history
1023 *
1024 * @return mixed String containing deletion reason or empty string, or
1025 * boolean false if no revision occurred
1026 *
1027 * @todo &$hasHistory is extremely ugly, it's here because
1028 * WikiPage::getAutoDeleteReason() and Article::generateReason()
1029 * have it / want it.
1030 */
1031 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
1032 $dbr = wfGetDB( DB_REPLICA );
1033
1034 // Get the last revision
1035 $rev = Revision::newFromTitle( $title );
1036
1037 if ( is_null( $rev ) ) {
1038 return false;
1039 }
1040
1041 // Get the article's contents
1042 $content = $rev->getContent();
1043 $blank = false;
1044
1045 // If the page is blank, use the text from the previous revision,
1046 // which can only be blank if there's a move/import/protect dummy
1047 // revision involved
1048 if ( !$content || $content->isEmpty() ) {
1049 $prev = $rev->getPrevious();
1050
1051 if ( $prev ) {
1052 $rev = $prev;
1053 $content = $rev->getContent();
1054 $blank = true;
1055 }
1056 }
1057
1058 $this->checkModelID( $rev->getContentModel() );
1059
1060 // Find out if there was only one contributor
1061 // Only scan the last 20 revisions
1062 $revQuery = Revision::getQueryInfo();
1063 $res = $dbr->select(
1064 $revQuery['tables'],
1065 [ 'rev_user_text' => $revQuery['fields']['rev_user_text'] ],
1066 [
1067 'rev_page' => $title->getArticleID(),
1068 $dbr->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0'
1069 ],
1070 __METHOD__,
1071 [ 'LIMIT' => 20 ],
1072 $revQuery['joins']
1073 );
1074
1075 if ( $res === false ) {
1076 // This page has no revisions, which is very weird
1077 return false;
1078 }
1079
1080 $hasHistory = ( $res->numRows() > 1 );
1081 $row = $dbr->fetchObject( $res );
1082
1083 if ( $row ) { // $row is false if the only contributor is hidden
1084 $onlyAuthor = $row->rev_user_text;
1085 // Try to find a second contributor
1086 foreach ( $res as $row ) {
1087 if ( $row->rev_user_text != $onlyAuthor ) { // T24999
1088 $onlyAuthor = false;
1089 break;
1090 }
1091 }
1092 } else {
1093 $onlyAuthor = false;
1094 }
1095
1096 // Generate the summary with a '$1' placeholder
1097 if ( $blank ) {
1098 // The current revision is blank and the one before is also
1099 // blank. It's just not our lucky day
1100 $reason = wfMessage( 'exbeforeblank', '$1' )->inContentLanguage()->text();
1101 } else {
1102 if ( $onlyAuthor ) {
1103 $reason = wfMessage(
1104 'excontentauthor',
1105 '$1',
1106 $onlyAuthor
1107 )->inContentLanguage()->text();
1108 } else {
1109 $reason = wfMessage( 'excontent', '$1' )->inContentLanguage()->text();
1110 }
1111 }
1112
1113 if ( $reason == '-' ) {
1114 // Allow these UI messages to be blanked out cleanly
1115 return '';
1116 }
1117
1118 // Max content length = max comment length - length of the comment (excl. $1)
1119 $text = $content ? $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ) : '';
1120
1121 // Now replace the '$1' placeholder
1122 $reason = str_replace( '$1', $text, $reason );
1123
1124 return $reason;
1125 }
1126
1127 /**
1128 * Get the Content object that needs to be saved in order to undo all revisions
1129 * between $undo and $undoafter. Revisions must belong to the same page,
1130 * must exist and must not be deleted.
1131 *
1132 * @since 1.21
1133 * @since 1.32 accepts Content objects for all parameters instead of Revision objects.
1134 * Passing Revision objects is deprecated.
1135 *
1136 * @param Revision|Content $current The current text
1137 * @param Revision|Content $undo The content of the revision to undo
1138 * @param Revision|Content $undoafter Must be from an earlier revision than $undo
1139 * @param bool $undoIsLatest Set true if $undo is from the current revision (since 1.32)
1140 *
1141 * @return mixed Content on success, false on failure
1142 */
1143 public function getUndoContent( $current, $undo, $undoafter, $undoIsLatest = false ) {
1144 Assert::parameterType( Revision::class . '|' . Content::class, $current, '$current' );
1145 if ( $current instanceof Content ) {
1146 Assert::parameter( $undo instanceof Content, '$undo',
1147 'Must be Content when $current is Content' );
1148 Assert::parameter( $undoafter instanceof Content, '$undoafter',
1149 'Must be Content when $current is Content' );
1150 $cur_content = $current;
1151 $undo_content = $undo;
1152 $undoafter_content = $undoafter;
1153 } else {
1154 Assert::parameter( $undo instanceof Revision, '$undo',
1155 'Must be Revision when $current is Revision' );
1156 Assert::parameter( $undoafter instanceof Revision, '$undoafter',
1157 'Must be Revision when $current is Revision' );
1158
1159 $cur_content = $current->getContent();
1160
1161 if ( empty( $cur_content ) ) {
1162 return false; // no page
1163 }
1164
1165 $undo_content = $undo->getContent();
1166 $undoafter_content = $undoafter->getContent();
1167
1168 if ( !$undo_content || !$undoafter_content ) {
1169 return false; // no content to undo
1170 }
1171
1172 $undoIsLatest = $current->getId() === $undo->getId();
1173 }
1174
1175 try {
1176 $this->checkModelID( $cur_content->getModel() );
1177 $this->checkModelID( $undo_content->getModel() );
1178 if ( !$undoIsLatest ) {
1179 // If we are undoing the most recent revision,
1180 // its ok to revert content model changes. However
1181 // if we are undoing a revision in the middle, then
1182 // doing that will be confusing.
1183 $this->checkModelID( $undoafter_content->getModel() );
1184 }
1185 } catch ( MWException $e ) {
1186 // If the revisions have different content models
1187 // just return false
1188 return false;
1189 }
1190
1191 if ( $cur_content->equals( $undo_content ) ) {
1192 // No use doing a merge if it's just a straight revert.
1193 return $undoafter_content;
1194 }
1195
1196 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
1197
1198 return $undone_content;
1199 }
1200
1201 /**
1202 * Get parser options suitable for rendering and caching the article
1203 *
1204 * @deprecated since 1.32, use WikiPage::makeParserOptions() or
1205 * ParserOptions::newCanonical() instead.
1206 * @param IContextSource|User|string $context One of the following:
1207 * - IContextSource: Use the User and the Language of the provided
1208 * context
1209 * - User: Use the provided User object and $wgLang for the language,
1210 * so use an IContextSource object if possible.
1211 * - 'canonical': Canonical options (anonymous user with default
1212 * preferences and content language).
1213 *
1214 * @throws MWException
1215 * @return ParserOptions
1216 */
1217 public function makeParserOptions( $context ) {
1218 wfDeprecated( __METHOD__, '1.32' );
1219 return ParserOptions::newCanonical( $context );
1220 }
1221
1222 /**
1223 * Returns true for content models that support caching using the
1224 * ParserCache mechanism. See WikiPage::shouldCheckParserCache().
1225 *
1226 * @since 1.21
1227 *
1228 * @return bool Always false.
1229 */
1230 public function isParserCacheSupported() {
1231 return false;
1232 }
1233
1234 /**
1235 * Returns true if this content model supports sections.
1236 * This default implementation returns false.
1237 *
1238 * Content models that return true here should also implement
1239 * Content::getSection, Content::replaceSection, etc. to handle sections..
1240 *
1241 * @return bool Always false.
1242 */
1243 public function supportsSections() {
1244 return false;
1245 }
1246
1247 /**
1248 * Returns true if this content model supports categories.
1249 * The default implementation returns true.
1250 *
1251 * @return bool Always true.
1252 */
1253 public function supportsCategories() {
1254 return true;
1255 }
1256
1257 /**
1258 * Returns true if this content model supports redirects.
1259 * This default implementation returns false.
1260 *
1261 * Content models that return true here should also implement
1262 * ContentHandler::makeRedirectContent to return a Content object.
1263 *
1264 * @return bool Always false.
1265 */
1266 public function supportsRedirects() {
1267 return false;
1268 }
1269
1270 /**
1271 * Return true if this content model supports direct editing, such as via EditPage.
1272 *
1273 * @return bool Default is false, and true for TextContent and it's derivatives.
1274 */
1275 public function supportsDirectEditing() {
1276 return false;
1277 }
1278
1279 /**
1280 * Whether or not this content model supports direct editing via ApiEditPage
1281 *
1282 * @return bool Default is false, and true for TextContent and derivatives.
1283 */
1284 public function supportsDirectApiEditing() {
1285 return $this->supportsDirectEditing();
1286 }
1287
1288 /**
1289 * Get fields definition for search index
1290 *
1291 * @todo Expose title, redirect, namespace, text, source_text, text_bytes
1292 * field mappings here. (see T142670 and T143409)
1293 *
1294 * @param SearchEngine $engine
1295 * @return SearchIndexField[] List of fields this content handler can provide.
1296 * @since 1.28
1297 */
1298 public function getFieldsForSearchIndex( SearchEngine $engine ) {
1299 $fields['category'] = $engine->makeSearchFieldMapping(
1300 'category',
1301 SearchIndexField::INDEX_TYPE_TEXT
1302 );
1303 $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1304
1305 $fields['external_link'] = $engine->makeSearchFieldMapping(
1306 'external_link',
1307 SearchIndexField::INDEX_TYPE_KEYWORD
1308 );
1309
1310 $fields['outgoing_link'] = $engine->makeSearchFieldMapping(
1311 'outgoing_link',
1312 SearchIndexField::INDEX_TYPE_KEYWORD
1313 );
1314
1315 $fields['template'] = $engine->makeSearchFieldMapping(
1316 'template',
1317 SearchIndexField::INDEX_TYPE_KEYWORD
1318 );
1319 $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1320
1321 $fields['content_model'] = $engine->makeSearchFieldMapping(
1322 'content_model',
1323 SearchIndexField::INDEX_TYPE_KEYWORD
1324 );
1325
1326 return $fields;
1327 }
1328
1329 /**
1330 * Add new field definition to array.
1331 * @param SearchIndexField[] &$fields
1332 * @param SearchEngine $engine
1333 * @param string $name
1334 * @param int $type
1335 * @return SearchIndexField[] new field defs
1336 * @since 1.28
1337 */
1338 protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) {
1339 $fields[$name] = $engine->makeSearchFieldMapping( $name, $type );
1340 return $fields;
1341 }
1342
1343 /**
1344 * Return fields to be indexed by search engine
1345 * as representation of this document.
1346 * Overriding class should call parent function or take care of calling
1347 * the SearchDataForIndex hook.
1348 * @param WikiPage $page Page to index
1349 * @param ParserOutput $output
1350 * @param SearchEngine $engine Search engine for which we are indexing
1351 * @return array Map of name=>value for fields
1352 * @since 1.28
1353 */
1354 public function getDataForSearchIndex(
1355 WikiPage $page,
1356 ParserOutput $output,
1357 SearchEngine $engine
1358 ) {
1359 $fieldData = [];
1360 $content = $page->getContent();
1361
1362 if ( $content ) {
1363 $searchDataExtractor = new ParserOutputSearchDataExtractor();
1364
1365 $fieldData['category'] = $searchDataExtractor->getCategories( $output );
1366 $fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
1367 $fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
1368 $fieldData['template'] = $searchDataExtractor->getTemplates( $output );
1369
1370 $text = $content->getTextForSearchIndex();
1371
1372 $fieldData['text'] = $text;
1373 $fieldData['source_text'] = $text;
1374 $fieldData['text_bytes'] = $content->getSize();
1375 $fieldData['content_model'] = $content->getModel();
1376 }
1377
1378 Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
1379 return $fieldData;
1380 }
1381
1382 /**
1383 * Produce page output suitable for indexing.
1384 *
1385 * Specific content handlers may override it if they need different content handling.
1386 *
1387 * @param WikiPage $page
1388 * @param ParserCache|null $cache
1389 * @return ParserOutput
1390 */
1391 public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
1392 // TODO: MCR: ContentHandler should be called per slot, not for the whole page.
1393 // See T190066.
1394 $parserOptions = $page->makeParserOptions( 'canonical' );
1395 if ( $cache ) {
1396 $parserOutput = $cache->get( $page, $parserOptions );
1397 }
1398
1399 if ( empty( $parserOutput ) ) {
1400 $renderer = MediaWikiServices::getInstance()->getRevisionRenderer();
1401 $parserOutput =
1402 $renderer->getRenderedRevision(
1403 $page->getRevision()->getRevisionRecord(),
1404 $parserOptions
1405 )->getRevisionParserOutput();
1406 if ( $cache ) {
1407 $cache->save( $parserOutput, $page, $parserOptions );
1408 }
1409 }
1410 return $parserOutput;
1411 }
1412
1413 }