4e50c8ee799b090784c7d2bb6a1d1ed4606e8e00
[lhc/web/wiklou.git] / includes / content / ContentHandler.php
1 <?php
2
3 use MediaWiki\Search\ParserOutputSearchDataExtractor;
4
5 /**
6 * Base class for content handling.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @since 1.21
24 *
25 * @file
26 * @ingroup Content
27 *
28 * @author Daniel Kinzler
29 */
30
31 /**
32 * Exception representing a failure to serialize or unserialize a content object.
33 *
34 * @ingroup Content
35 */
36 class MWContentSerializationException extends MWException {
37 }
38
39 /**
40 * Exception thrown when an unregistered content model is requested. This error
41 * can be triggered by user input, so a separate exception class is provided so
42 * callers can substitute a context-specific, internationalised error message.
43 *
44 * @ingroup Content
45 * @since 1.27
46 */
47 class MWUnknownContentModelException extends MWException {
48 /** @var string The name of the unknown content model */
49 private $modelId;
50
51 /** @param string $modelId */
52 function __construct( $modelId ) {
53 parent::__construct( "The content model '$modelId' is not registered on this wiki.\n" .
54 'See https://www.mediawiki.org/wiki/Content_handlers to find out which extensions ' .
55 'handle this content model.' );
56 $this->modelId = $modelId;
57 }
58
59 /** @return string */
60 public function getModelId() {
61 return $this->modelId;
62 }
63 }
64
65 /**
66 * A content handler knows how do deal with a specific type of content on a wiki
67 * page. Content is stored in the database in a serialized form (using a
68 * serialization format a.k.a. MIME type) and is unserialized into its native
69 * PHP representation (the content model), which is wrapped in an instance of
70 * the appropriate subclass of Content.
71 *
72 * ContentHandler instances are stateless singletons that serve, among other
73 * things, as a factory for Content objects. Generally, there is one subclass
74 * of ContentHandler and one subclass of Content for every type of content model.
75 *
76 * Some content types have a flat model, that is, their native representation
77 * is the same as their serialized form. Examples would be JavaScript and CSS
78 * code. As of now, this also applies to wikitext (MediaWiki's default content
79 * type), but wikitext content may be represented by a DOM or AST structure in
80 * the future.
81 *
82 * @ingroup Content
83 */
84 abstract class ContentHandler {
85 /**
86 * Switch for enabling deprecation warnings. Used by ContentHandler::deprecated()
87 * and ContentHandler::runLegacyHooks().
88 *
89 * Once the ContentHandler code has settled in a bit, this should be set to true to
90 * make extensions etc. show warnings when using deprecated functions and hooks.
91 */
92 protected static $enableDeprecationWarnings = false;
93
94 /**
95 * Convenience function for getting flat text from a Content object. This
96 * should only be used in the context of backwards compatibility with code
97 * that is not yet able to handle Content objects!
98 *
99 * If $content is null, this method returns the empty string.
100 *
101 * If $content is an instance of TextContent, this method returns the flat
102 * text as returned by $content->getNativeData().
103 *
104 * If $content is not a TextContent object, the behavior of this method
105 * depends on the global $wgContentHandlerTextFallback:
106 * - If $wgContentHandlerTextFallback is 'fail' and $content is not a
107 * TextContent object, an MWException is thrown.
108 * - If $wgContentHandlerTextFallback is 'serialize' and $content is not a
109 * TextContent object, $content->serialize() is called to get a string
110 * form of the content.
111 * - If $wgContentHandlerTextFallback is 'ignore' and $content is not a
112 * TextContent object, this method returns null.
113 * - otherwise, the behavior is undefined.
114 *
115 * @since 1.21
116 *
117 * @param Content $content
118 *
119 * @throws MWException If the content is not an instance of TextContent and
120 * wgContentHandlerTextFallback was set to 'fail'.
121 * @return string|null Textual form of the content, if available.
122 */
123 public static function getContentText( Content $content = null ) {
124 global $wgContentHandlerTextFallback;
125
126 if ( is_null( $content ) ) {
127 return '';
128 }
129
130 if ( $content instanceof TextContent ) {
131 return $content->getNativeData();
132 }
133
134 wfDebugLog( 'ContentHandler', 'Accessing ' . $content->getModel() . ' content as text!' );
135
136 if ( $wgContentHandlerTextFallback == 'fail' ) {
137 throw new MWException(
138 "Attempt to get text from Content with model " .
139 $content->getModel()
140 );
141 }
142
143 if ( $wgContentHandlerTextFallback == 'serialize' ) {
144 return $content->serialize();
145 }
146
147 return null;
148 }
149
150 /**
151 * Convenience function for creating a Content object from a given textual
152 * representation.
153 *
154 * $text will be deserialized into a Content object of the model specified
155 * by $modelId (or, if that is not given, $title->getContentModel()) using
156 * the given format.
157 *
158 * @since 1.21
159 *
160 * @param string $text The textual representation, will be
161 * unserialized to create the Content object
162 * @param Title $title The title of the page this text belongs to.
163 * Required if $modelId is not provided.
164 * @param string $modelId The model to deserialize to. If not provided,
165 * $title->getContentModel() is used.
166 * @param string $format The format to use for deserialization. If not
167 * given, the model's default format is used.
168 *
169 * @throws MWException If model ID or format is not supported or if the text can not be
170 * unserialized using the format.
171 * @return Content A Content object representing the text.
172 */
173 public static function makeContent( $text, Title $title = null,
174 $modelId = null, $format = null ) {
175 if ( is_null( $modelId ) ) {
176 if ( is_null( $title ) ) {
177 throw new MWException( "Must provide a Title object or a content model ID." );
178 }
179
180 $modelId = $title->getContentModel();
181 }
182
183 $handler = ContentHandler::getForModelID( $modelId );
184
185 return $handler->unserializeContent( $text, $format );
186 }
187
188 /**
189 * Returns the name of the default content model to be used for the page
190 * with the given title.
191 *
192 * Note: There should rarely be need to call this method directly.
193 * To determine the actual content model for a given page, use
194 * Title::getContentModel().
195 *
196 * Which model is to be used by default for the page is determined based
197 * on several factors:
198 * - The global setting $wgNamespaceContentModels specifies a content model
199 * per namespace.
200 * - The hook ContentHandlerDefaultModelFor may be used to override the page's default
201 * model.
202 * - Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript
203 * model if they end in .js or .css, respectively.
204 * - Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
205 * - The hook TitleIsCssOrJsPage may be used to force a page to use the CSS
206 * or JavaScript model. This is a compatibility feature. The ContentHandlerDefaultModelFor
207 * hook should be used instead if possible.
208 * - The hook TitleIsWikitextPage may be used to force a page to use the
209 * wikitext model. This is a compatibility feature. The ContentHandlerDefaultModelFor
210 * hook should be used instead if possible.
211 *
212 * If none of the above applies, the wikitext model is used.
213 *
214 * Note: this is used by, and may thus not use, Title::getContentModel()
215 *
216 * @since 1.21
217 *
218 * @param Title $title
219 *
220 * @return string Default model name for the page given by $title
221 */
222 public static function getDefaultModelFor( Title $title ) {
223 // NOTE: this method must not rely on $title->getContentModel() directly or indirectly,
224 // because it is used to initialize the mContentModel member.
225
226 $ns = $title->getNamespace();
227
228 $ext = false;
229 $m = null;
230 $model = MWNamespace::getNamespaceContentModel( $ns );
231
232 // Hook can determine default model
233 if ( !Hooks::run( 'ContentHandlerDefaultModelFor', [ $title, &$model ] ) ) {
234 if ( !is_null( $model ) ) {
235 return $model;
236 }
237 }
238
239 // Could this page contain code based on the title?
240 $isCodePage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js|json)$!u', $title->getText(), $m );
241 if ( $isCodePage ) {
242 $ext = $m[1];
243 }
244
245 // Hook can force JS/CSS
246 Hooks::run( 'TitleIsCssOrJsPage', [ $title, &$isCodePage ], '1.25' );
247
248 // Is this a user subpage containing code?
249 $isCodeSubpage = NS_USER == $ns
250 && !$isCodePage
251 && preg_match( "/\\/.*\\.(js|css|json)$/", $title->getText(), $m );
252 if ( $isCodeSubpage ) {
253 $ext = $m[1];
254 }
255
256 // Is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
257 $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
258 $isWikitext = $isWikitext && !$isCodePage && !$isCodeSubpage;
259
260 // Hook can override $isWikitext
261 Hooks::run( 'TitleIsWikitextPage', [ $title, &$isWikitext ], '1.25' );
262
263 if ( !$isWikitext ) {
264 switch ( $ext ) {
265 case 'js':
266 return CONTENT_MODEL_JAVASCRIPT;
267 case 'css':
268 return CONTENT_MODEL_CSS;
269 case 'json':
270 return CONTENT_MODEL_JSON;
271 default:
272 return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
273 }
274 }
275
276 // We established that it must be wikitext
277
278 return CONTENT_MODEL_WIKITEXT;
279 }
280
281 /**
282 * Returns the appropriate ContentHandler singleton for the given title.
283 *
284 * @since 1.21
285 *
286 * @param Title $title
287 *
288 * @return ContentHandler
289 */
290 public static function getForTitle( Title $title ) {
291 $modelId = $title->getContentModel();
292
293 return ContentHandler::getForModelID( $modelId );
294 }
295
296 /**
297 * Returns the appropriate ContentHandler singleton for the given Content
298 * object.
299 *
300 * @since 1.21
301 *
302 * @param Content $content
303 *
304 * @return ContentHandler
305 */
306 public static function getForContent( Content $content ) {
307 $modelId = $content->getModel();
308
309 return ContentHandler::getForModelID( $modelId );
310 }
311
312 /**
313 * @var array A Cache of ContentHandler instances by model id
314 */
315 protected static $handlers;
316
317 /**
318 * Returns the ContentHandler singleton for the given model ID. Use the
319 * CONTENT_MODEL_XXX constants to identify the desired content model.
320 *
321 * ContentHandler singletons are taken from the global $wgContentHandlers
322 * array. Keys in that array are model names, the values are either
323 * ContentHandler singleton objects, or strings specifying the appropriate
324 * subclass of ContentHandler.
325 *
326 * If a class name is encountered when looking up the singleton for a given
327 * model name, the class is instantiated and the class name is replaced by
328 * the resulting singleton in $wgContentHandlers.
329 *
330 * If no ContentHandler is defined for the desired $modelId, the
331 * ContentHandler may be provided by the ContentHandlerForModelID hook.
332 * If no ContentHandler can be determined, an MWException is raised.
333 *
334 * @since 1.21
335 *
336 * @param string $modelId The ID of the content model for which to get a
337 * handler. Use CONTENT_MODEL_XXX constants.
338 *
339 * @throws MWException For internal errors and problems in the configuration.
340 * @throws MWUnknownContentModelException If no handler is known for the model ID.
341 * @return ContentHandler The ContentHandler singleton for handling the model given by the ID.
342 */
343 public static function getForModelID( $modelId ) {
344 global $wgContentHandlers;
345
346 if ( isset( ContentHandler::$handlers[$modelId] ) ) {
347 return ContentHandler::$handlers[$modelId];
348 }
349
350 if ( empty( $wgContentHandlers[$modelId] ) ) {
351 $handler = null;
352
353 Hooks::run( 'ContentHandlerForModelID', [ $modelId, &$handler ] );
354
355 if ( $handler === null ) {
356 throw new MWUnknownContentModelException( $modelId );
357 }
358
359 if ( !( $handler instanceof ContentHandler ) ) {
360 throw new MWException( "ContentHandlerForModelID must supply a ContentHandler instance" );
361 }
362 } else {
363 $classOrCallback = $wgContentHandlers[$modelId];
364
365 if ( is_callable( $classOrCallback ) ) {
366 $handler = call_user_func( $classOrCallback, $modelId );
367 } else {
368 $handler = new $classOrCallback( $modelId );
369 }
370
371 if ( !( $handler instanceof ContentHandler ) ) {
372 throw new MWException( "$classOrCallback from \$wgContentHandlers is not " .
373 "compatible with ContentHandler" );
374 }
375 }
376
377 wfDebugLog( 'ContentHandler', 'Created handler for ' . $modelId
378 . ': ' . get_class( $handler ) );
379
380 ContentHandler::$handlers[$modelId] = $handler;
381
382 return ContentHandler::$handlers[$modelId];
383 }
384
385 /**
386 * Returns the localized name for a given content model.
387 *
388 * Model names are localized using system messages. Message keys
389 * have the form content-model-$name, where $name is getContentModelName( $id ).
390 *
391 * @param string $name The content model ID, as given by a CONTENT_MODEL_XXX
392 * constant or returned by Revision::getContentModel().
393 * @param Language|null $lang The language to parse the message in (since 1.26)
394 *
395 * @throws MWException If the model ID isn't known.
396 * @return string The content model's localized name.
397 */
398 public static function getLocalizedName( $name, Language $lang = null ) {
399 // Messages: content-model-wikitext, content-model-text,
400 // content-model-javascript, content-model-css
401 $key = "content-model-$name";
402
403 $msg = wfMessage( $key );
404 if ( $lang ) {
405 $msg->inLanguage( $lang );
406 }
407
408 return $msg->exists() ? $msg->plain() : $name;
409 }
410
411 public static function getContentModels() {
412 global $wgContentHandlers;
413
414 return array_keys( $wgContentHandlers );
415 }
416
417 public static function getAllContentFormats() {
418 global $wgContentHandlers;
419
420 $formats = [];
421
422 foreach ( $wgContentHandlers as $model => $class ) {
423 $handler = ContentHandler::getForModelID( $model );
424 $formats = array_merge( $formats, $handler->getSupportedFormats() );
425 }
426
427 $formats = array_unique( $formats );
428
429 return $formats;
430 }
431
432 // ------------------------------------------------------------------------
433
434 /**
435 * @var string
436 */
437 protected $mModelID;
438
439 /**
440 * @var string[]
441 */
442 protected $mSupportedFormats;
443
444 /**
445 * Constructor, initializing the ContentHandler instance with its model ID
446 * and a list of supported formats. Values for the parameters are typically
447 * provided as literals by subclass's constructors.
448 *
449 * @param string $modelId (use CONTENT_MODEL_XXX constants).
450 * @param string[] $formats List for supported serialization formats
451 * (typically as MIME types)
452 */
453 public function __construct( $modelId, $formats ) {
454 $this->mModelID = $modelId;
455 $this->mSupportedFormats = $formats;
456 }
457
458 /**
459 * Serializes a Content object of the type supported by this ContentHandler.
460 *
461 * @since 1.21
462 *
463 * @param Content $content The Content object to serialize
464 * @param string $format The desired serialization format
465 *
466 * @return string Serialized form of the content
467 */
468 abstract public function serializeContent( Content $content, $format = null );
469
470 /**
471 * Applies transformations on export (returns the blob unchanged per default).
472 * Subclasses may override this to perform transformations such as conversion
473 * of legacy formats or filtering of internal meta-data.
474 *
475 * @param string $blob The blob to be exported
476 * @param string|null $format The blob's serialization format
477 *
478 * @return string
479 */
480 public function exportTransform( $blob, $format = null ) {
481 return $blob;
482 }
483
484 /**
485 * Unserializes a Content object of the type supported by this ContentHandler.
486 *
487 * @since 1.21
488 *
489 * @param string $blob Serialized form of the content
490 * @param string $format The format used for serialization
491 *
492 * @return Content The Content object created by deserializing $blob
493 */
494 abstract public function unserializeContent( $blob, $format = null );
495
496 /**
497 * Apply import transformation (per default, returns $blob unchanged).
498 * This gives subclasses an opportunity to transform data blobs on import.
499 *
500 * @since 1.24
501 *
502 * @param string $blob
503 * @param string|null $format
504 *
505 * @return string
506 */
507 public function importTransform( $blob, $format = null ) {
508 return $blob;
509 }
510
511 /**
512 * Creates an empty Content object of the type supported by this
513 * ContentHandler.
514 *
515 * @since 1.21
516 *
517 * @return Content
518 */
519 abstract public function makeEmptyContent();
520
521 /**
522 * Creates a new Content object that acts as a redirect to the given page,
523 * or null if redirects are not supported by this content model.
524 *
525 * This default implementation always returns null. Subclasses supporting redirects
526 * must override this method.
527 *
528 * Note that subclasses that override this method to return a Content object
529 * should also override supportsRedirects() to return true.
530 *
531 * @since 1.21
532 *
533 * @param Title $destination The page to redirect to.
534 * @param string $text Text to include in the redirect, if possible.
535 *
536 * @return Content Always null.
537 */
538 public function makeRedirectContent( Title $destination, $text = '' ) {
539 return null;
540 }
541
542 /**
543 * Returns the model id that identifies the content model this
544 * ContentHandler can handle. Use with the CONTENT_MODEL_XXX constants.
545 *
546 * @since 1.21
547 *
548 * @return string The model ID
549 */
550 public function getModelID() {
551 return $this->mModelID;
552 }
553
554 /**
555 * @since 1.21
556 *
557 * @param string $model_id The model to check
558 *
559 * @throws MWException If the model ID is not the ID of the content model supported by this
560 * ContentHandler.
561 */
562 protected function checkModelID( $model_id ) {
563 if ( $model_id !== $this->mModelID ) {
564 throw new MWException( "Bad content model: " .
565 "expected {$this->mModelID} " .
566 "but got $model_id." );
567 }
568 }
569
570 /**
571 * Returns a list of serialization formats supported by the
572 * serializeContent() and unserializeContent() methods of this
573 * ContentHandler.
574 *
575 * @since 1.21
576 *
577 * @return string[] List of serialization formats as MIME type like strings
578 */
579 public function getSupportedFormats() {
580 return $this->mSupportedFormats;
581 }
582
583 /**
584 * The format used for serialization/deserialization by default by this
585 * ContentHandler.
586 *
587 * This default implementation will return the first element of the array
588 * of formats that was passed to the constructor.
589 *
590 * @since 1.21
591 *
592 * @return string The name of the default serialization format as a MIME type
593 */
594 public function getDefaultFormat() {
595 return $this->mSupportedFormats[0];
596 }
597
598 /**
599 * Returns true if $format is a serialization format supported by this
600 * ContentHandler, and false otherwise.
601 *
602 * Note that if $format is null, this method always returns true, because
603 * null means "use the default format".
604 *
605 * @since 1.21
606 *
607 * @param string $format The serialization format to check
608 *
609 * @return bool
610 */
611 public function isSupportedFormat( $format ) {
612 if ( !$format ) {
613 return true; // this means "use the default"
614 }
615
616 return in_array( $format, $this->mSupportedFormats );
617 }
618
619 /**
620 * Convenient for checking whether a format provided as a parameter is actually supported.
621 *
622 * @param string $format The serialization format to check
623 *
624 * @throws MWException If the format is not supported by this content handler.
625 */
626 protected function checkFormat( $format ) {
627 if ( !$this->isSupportedFormat( $format ) ) {
628 throw new MWException(
629 "Format $format is not supported for content model "
630 . $this->getModelID()
631 );
632 }
633 }
634
635 /**
636 * Returns overrides for action handlers.
637 * Classes listed here will be used instead of the default one when
638 * (and only when) $wgActions[$action] === true. This allows subclasses
639 * to override the default action handlers.
640 *
641 * @since 1.21
642 *
643 * @return array An array mapping action names (typically "view", "edit", "history" etc.) to
644 * either the full qualified class name of an Action class, a callable taking ( Page $page,
645 * IContextSource $context = null ) as parameters and returning an Action object, or an actual
646 * Action object. An empty array in this default implementation.
647 *
648 * @see Action::factory
649 */
650 public function getActionOverrides() {
651 return [];
652 }
653
654 /**
655 * Factory for creating an appropriate DifferenceEngine for this content model.
656 *
657 * @since 1.21
658 *
659 * @param IContextSource $context Context to use, anything else will be ignored.
660 * @param int $old Revision ID we want to show and diff with.
661 * @param int|string $new Either a revision ID or one of the strings 'cur', 'prev' or 'next'.
662 * @param int $rcid FIXME: Deprecated, no longer used. Defaults to 0.
663 * @param bool $refreshCache If set, refreshes the diff cache. Defaults to false.
664 * @param bool $unhide If set, allow viewing deleted revs. Defaults to false.
665 *
666 * @return DifferenceEngine
667 */
668 public function createDifferenceEngine( IContextSource $context, $old = 0, $new = 0,
669 $rcid = 0, // FIXME: Deprecated, no longer used
670 $refreshCache = false, $unhide = false ) {
671
672 // hook: get difference engine
673 $differenceEngine = null;
674 if ( !Hooks::run( 'GetDifferenceEngine',
675 [ $context, $old, $new, $refreshCache, $unhide, &$differenceEngine ]
676 ) ) {
677 return $differenceEngine;
678 }
679 $diffEngineClass = $this->getDiffEngineClass();
680 return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
681 }
682
683 /**
684 * Get the language in which the content of the given page is written.
685 *
686 * This default implementation just returns $wgContLang (except for pages
687 * in the MediaWiki namespace)
688 *
689 * Note that the pages language is not cacheable, since it may in some
690 * cases depend on user settings.
691 *
692 * Also note that the page language may or may not depend on the actual content of the page,
693 * that is, this method may load the content in order to determine the language.
694 *
695 * @since 1.21
696 *
697 * @param Title $title The page to determine the language for.
698 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
699 *
700 * @return Language The page's language
701 */
702 public function getPageLanguage( Title $title, Content $content = null ) {
703 global $wgContLang, $wgLang;
704 $pageLang = $wgContLang;
705
706 if ( $title->getNamespace() == NS_MEDIAWIKI ) {
707 // Parse mediawiki messages with correct target language
708 list( /* $unused */, $lang ) = MessageCache::singleton()->figureMessage( $title->getText() );
709 $pageLang = wfGetLangObj( $lang );
710 }
711
712 Hooks::run( 'PageContentLanguage', [ $title, &$pageLang, $wgLang ] );
713
714 return wfGetLangObj( $pageLang );
715 }
716
717 /**
718 * Get the language in which the content of this page is written when
719 * viewed by user. Defaults to $this->getPageLanguage(), but if the user
720 * specified a preferred variant, the variant will be used.
721 *
722 * This default implementation just returns $this->getPageLanguage( $title, $content ) unless
723 * the user specified a preferred variant.
724 *
725 * Note that the pages view language is not cacheable, since it depends on user settings.
726 *
727 * Also note that the page language may or may not depend on the actual content of the page,
728 * that is, this method may load the content in order to determine the language.
729 *
730 * @since 1.21
731 *
732 * @param Title $title The page to determine the language for.
733 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
734 *
735 * @return Language The page's language for viewing
736 */
737 public function getPageViewLanguage( Title $title, Content $content = null ) {
738 $pageLang = $this->getPageLanguage( $title, $content );
739
740 if ( $title->getNamespace() !== NS_MEDIAWIKI ) {
741 // If the user chooses a variant, the content is actually
742 // in a language whose code is the variant code.
743 $variant = $pageLang->getPreferredVariant();
744 if ( $pageLang->getCode() !== $variant ) {
745 $pageLang = Language::factory( $variant );
746 }
747 }
748
749 return $pageLang;
750 }
751
752 /**
753 * Determines whether the content type handled by this ContentHandler
754 * can be used on the given page.
755 *
756 * This default implementation always returns true.
757 * Subclasses may override this to restrict the use of this content model to specific locations,
758 * typically based on the namespace or some other aspect of the title, such as a special suffix
759 * (e.g. ".svg" for SVG content).
760 *
761 * @note this calls the ContentHandlerCanBeUsedOn hook which may be used to override which
762 * content model can be used where.
763 *
764 * @param Title $title The page's title.
765 *
766 * @return bool True if content of this kind can be used on the given page, false otherwise.
767 */
768 public function canBeUsedOn( Title $title ) {
769 $ok = true;
770
771 Hooks::run( 'ContentModelCanBeUsedOn', [ $this->getModelID(), $title, &$ok ] );
772
773 return $ok;
774 }
775
776 /**
777 * Returns the name of the diff engine to use.
778 *
779 * @since 1.21
780 *
781 * @return string
782 */
783 protected function getDiffEngineClass() {
784 return DifferenceEngine::class;
785 }
786
787 /**
788 * Attempts to merge differences between three versions. Returns a new
789 * Content object for a clean merge and false for failure or a conflict.
790 *
791 * This default implementation always returns false.
792 *
793 * @since 1.21
794 *
795 * @param Content $oldContent The page's previous content.
796 * @param Content $myContent One of the page's conflicting contents.
797 * @param Content $yourContent One of the page's conflicting contents.
798 *
799 * @return Content|bool Always false.
800 */
801 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
802 return false;
803 }
804
805 /**
806 * Return an applicable auto-summary if one exists for the given edit.
807 *
808 * @since 1.21
809 *
810 * @param Content $oldContent The previous text of the page.
811 * @param Content $newContent The submitted text of the page.
812 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
813 *
814 * @return string An appropriate auto-summary, or an empty string.
815 */
816 public function getAutosummary( Content $oldContent = null, Content $newContent = null,
817 $flags ) {
818 // Decide what kind of auto-summary is needed.
819
820 // Redirect auto-summaries
821
822 /**
823 * @var $ot Title
824 * @var $rt Title
825 */
826
827 $ot = !is_null( $oldContent ) ? $oldContent->getRedirectTarget() : null;
828 $rt = !is_null( $newContent ) ? $newContent->getRedirectTarget() : null;
829
830 if ( is_object( $rt ) ) {
831 if ( !is_object( $ot )
832 || !$rt->equals( $ot )
833 || $ot->getFragment() != $rt->getFragment()
834 ) {
835 $truncatedtext = $newContent->getTextForSummary(
836 250
837 - strlen( wfMessage( 'autoredircomment' )->inContentLanguage()->text() )
838 - strlen( $rt->getFullText() ) );
839
840 return wfMessage( 'autoredircomment', $rt->getFullText() )
841 ->rawParams( $truncatedtext )->inContentLanguage()->text();
842 }
843 }
844
845 // New page auto-summaries
846 if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
847 // If they're making a new article, give its text, truncated, in
848 // the summary.
849
850 $truncatedtext = $newContent->getTextForSummary(
851 200 - strlen( wfMessage( 'autosumm-new' )->inContentLanguage()->text() ) );
852
853 return wfMessage( 'autosumm-new' )->rawParams( $truncatedtext )
854 ->inContentLanguage()->text();
855 }
856
857 // Blanking auto-summaries
858 if ( !empty( $oldContent ) && $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
859 return wfMessage( 'autosumm-blank' )->inContentLanguage()->text();
860 } elseif ( !empty( $oldContent )
861 && $oldContent->getSize() > 10 * $newContent->getSize()
862 && $newContent->getSize() < 500
863 ) {
864 // Removing more than 90% of the article
865
866 $truncatedtext = $newContent->getTextForSummary(
867 200 - strlen( wfMessage( 'autosumm-replace' )->inContentLanguage()->text() ) );
868
869 return wfMessage( 'autosumm-replace' )->rawParams( $truncatedtext )
870 ->inContentLanguage()->text();
871 }
872
873 // New blank article auto-summary
874 if ( $flags & EDIT_NEW && $newContent->isEmpty() ) {
875 return wfMessage( 'autosumm-newblank' )->inContentLanguage()->text();
876 }
877
878 // If we reach this point, there's no applicable auto-summary for our
879 // case, so our auto-summary is empty.
880 return '';
881 }
882
883 /**
884 * Auto-generates a deletion reason
885 *
886 * @since 1.21
887 *
888 * @param Title $title The page's title
889 * @param bool &$hasHistory Whether the page has a history
890 *
891 * @return mixed String containing deletion reason or empty string, or
892 * boolean false if no revision occurred
893 *
894 * @todo &$hasHistory is extremely ugly, it's here because
895 * WikiPage::getAutoDeleteReason() and Article::generateReason()
896 * have it / want it.
897 */
898 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
899 $dbr = wfGetDB( DB_REPLICA );
900
901 // Get the last revision
902 $rev = Revision::newFromTitle( $title );
903
904 if ( is_null( $rev ) ) {
905 return false;
906 }
907
908 // Get the article's contents
909 $content = $rev->getContent();
910 $blank = false;
911
912 // If the page is blank, use the text from the previous revision,
913 // which can only be blank if there's a move/import/protect dummy
914 // revision involved
915 if ( !$content || $content->isEmpty() ) {
916 $prev = $rev->getPrevious();
917
918 if ( $prev ) {
919 $rev = $prev;
920 $content = $rev->getContent();
921 $blank = true;
922 }
923 }
924
925 $this->checkModelID( $rev->getContentModel() );
926
927 // Find out if there was only one contributor
928 // Only scan the last 20 revisions
929 $res = $dbr->select( 'revision', 'rev_user_text',
930 [
931 'rev_page' => $title->getArticleID(),
932 $dbr->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0'
933 ],
934 __METHOD__,
935 [ 'LIMIT' => 20 ]
936 );
937
938 if ( $res === false ) {
939 // This page has no revisions, which is very weird
940 return false;
941 }
942
943 $hasHistory = ( $res->numRows() > 1 );
944 $row = $dbr->fetchObject( $res );
945
946 if ( $row ) { // $row is false if the only contributor is hidden
947 $onlyAuthor = $row->rev_user_text;
948 // Try to find a second contributor
949 foreach ( $res as $row ) {
950 if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
951 $onlyAuthor = false;
952 break;
953 }
954 }
955 } else {
956 $onlyAuthor = false;
957 }
958
959 // Generate the summary with a '$1' placeholder
960 if ( $blank ) {
961 // The current revision is blank and the one before is also
962 // blank. It's just not our lucky day
963 $reason = wfMessage( 'exbeforeblank', '$1' )->inContentLanguage()->text();
964 } else {
965 if ( $onlyAuthor ) {
966 $reason = wfMessage(
967 'excontentauthor',
968 '$1',
969 $onlyAuthor
970 )->inContentLanguage()->text();
971 } else {
972 $reason = wfMessage( 'excontent', '$1' )->inContentLanguage()->text();
973 }
974 }
975
976 if ( $reason == '-' ) {
977 // Allow these UI messages to be blanked out cleanly
978 return '';
979 }
980
981 // Max content length = max comment length - length of the comment (excl. $1)
982 $text = $content ? $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ) : '';
983
984 // Now replace the '$1' placeholder
985 $reason = str_replace( '$1', $text, $reason );
986
987 return $reason;
988 }
989
990 /**
991 * Get the Content object that needs to be saved in order to undo all revisions
992 * between $undo and $undoafter. Revisions must belong to the same page,
993 * must exist and must not be deleted.
994 *
995 * @since 1.21
996 *
997 * @param Revision $current The current text
998 * @param Revision $undo The revision to undo
999 * @param Revision $undoafter Must be an earlier revision than $undo
1000 *
1001 * @return mixed String on success, false on failure
1002 */
1003 public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter ) {
1004 $cur_content = $current->getContent();
1005
1006 if ( empty( $cur_content ) ) {
1007 return false; // no page
1008 }
1009
1010 $undo_content = $undo->getContent();
1011 $undoafter_content = $undoafter->getContent();
1012
1013 if ( !$undo_content || !$undoafter_content ) {
1014 return false; // no content to undo
1015 }
1016
1017 try {
1018 $this->checkModelID( $cur_content->getModel() );
1019 $this->checkModelID( $undo_content->getModel() );
1020 if ( $current->getId() !== $undo->getId() ) {
1021 // If we are undoing the most recent revision,
1022 // its ok to revert content model changes. However
1023 // if we are undoing a revision in the middle, then
1024 // doing that will be confusing.
1025 $this->checkModelID( $undoafter_content->getModel() );
1026 }
1027 } catch ( MWException $e ) {
1028 // If the revisions have different content models
1029 // just return false
1030 return false;
1031 }
1032
1033 if ( $cur_content->equals( $undo_content ) ) {
1034 // No use doing a merge if it's just a straight revert.
1035 return $undoafter_content;
1036 }
1037
1038 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
1039
1040 return $undone_content;
1041 }
1042
1043 /**
1044 * Get parser options suitable for rendering and caching the article
1045 *
1046 * @param IContextSource|User|string $context One of the following:
1047 * - IContextSource: Use the User and the Language of the provided
1048 * context
1049 * - User: Use the provided User object and $wgLang for the language,
1050 * so use an IContextSource object if possible.
1051 * - 'canonical': Canonical options (anonymous user with default
1052 * preferences and content language).
1053 *
1054 * @throws MWException
1055 * @return ParserOptions
1056 */
1057 public function makeParserOptions( $context ) {
1058 global $wgContLang, $wgEnableParserLimitReporting;
1059
1060 if ( $context instanceof IContextSource ) {
1061 $options = ParserOptions::newFromContext( $context );
1062 } elseif ( $context instanceof User ) { // settings per user (even anons)
1063 $options = ParserOptions::newFromUser( $context );
1064 } elseif ( $context === 'canonical' ) { // canonical settings
1065 $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
1066 } else {
1067 throw new MWException( "Bad context for parser options: $context" );
1068 }
1069
1070 $options->enableLimitReport( $wgEnableParserLimitReporting ); // show inclusion/loop reports
1071 $options->setTidy( true ); // fix bad HTML
1072
1073 return $options;
1074 }
1075
1076 /**
1077 * Returns true for content models that support caching using the
1078 * ParserCache mechanism. See WikiPage::shouldCheckParserCache().
1079 *
1080 * @since 1.21
1081 *
1082 * @return bool Always false.
1083 */
1084 public function isParserCacheSupported() {
1085 return false;
1086 }
1087
1088 /**
1089 * Returns true if this content model supports sections.
1090 * This default implementation returns false.
1091 *
1092 * Content models that return true here should also implement
1093 * Content::getSection, Content::replaceSection, etc. to handle sections..
1094 *
1095 * @return bool Always false.
1096 */
1097 public function supportsSections() {
1098 return false;
1099 }
1100
1101 /**
1102 * Returns true if this content model supports categories.
1103 * The default implementation returns true.
1104 *
1105 * @return bool Always true.
1106 */
1107 public function supportsCategories() {
1108 return true;
1109 }
1110
1111 /**
1112 * Returns true if this content model supports redirects.
1113 * This default implementation returns false.
1114 *
1115 * Content models that return true here should also implement
1116 * ContentHandler::makeRedirectContent to return a Content object.
1117 *
1118 * @return bool Always false.
1119 */
1120 public function supportsRedirects() {
1121 return false;
1122 }
1123
1124 /**
1125 * Return true if this content model supports direct editing, such as via EditPage.
1126 *
1127 * @return bool Default is false, and true for TextContent and it's derivatives.
1128 */
1129 public function supportsDirectEditing() {
1130 return false;
1131 }
1132
1133 /**
1134 * Whether or not this content model supports direct editing via ApiEditPage
1135 *
1136 * @return bool Default is false, and true for TextContent and derivatives.
1137 */
1138 public function supportsDirectApiEditing() {
1139 return $this->supportsDirectEditing();
1140 }
1141
1142 /**
1143 * Logs a deprecation warning, visible if $wgDevelopmentWarnings, but only if
1144 * self::$enableDeprecationWarnings is set to true.
1145 *
1146 * @param string $func The name of the deprecated function
1147 * @param string $version The version since the method is deprecated. Usually 1.21
1148 * for ContentHandler related stuff.
1149 * @param string|bool $component : Component to which the function belongs.
1150 * If false, it is assumed the function is in MediaWiki core.
1151 *
1152 * @see ContentHandler::$enableDeprecationWarnings
1153 * @see wfDeprecated
1154 */
1155 public static function deprecated( $func, $version, $component = false ) {
1156 if ( self::$enableDeprecationWarnings ) {
1157 wfDeprecated( $func, $version, $component, 3 );
1158 }
1159 }
1160
1161 /**
1162 * Call a legacy hook that uses text instead of Content objects.
1163 * Will log a warning when a matching hook function is registered.
1164 * If the textual representation of the content is changed by the
1165 * hook function, a new Content object is constructed from the new
1166 * text.
1167 *
1168 * @param string $event Event name
1169 * @param array $args Parameters passed to hook functions
1170 * @param string|null $deprecatedVersion Emit a deprecation notice
1171 * when the hook is run for the provided version
1172 *
1173 * @return bool True if no handler aborted the hook
1174 */
1175 public static function runLegacyHooks( $event, $args = [],
1176 $deprecatedVersion = null
1177 ) {
1178
1179 if ( !Hooks::isRegistered( $event ) ) {
1180 return true; // nothing to do here
1181 }
1182
1183 // convert Content objects to text
1184 $contentObjects = [];
1185 $contentTexts = [];
1186
1187 foreach ( $args as $k => $v ) {
1188 if ( $v instanceof Content ) {
1189 /* @var Content $v */
1190
1191 $contentObjects[$k] = $v;
1192
1193 $v = $v->serialize();
1194 $contentTexts[$k] = $v;
1195 $args[$k] = $v;
1196 }
1197 }
1198
1199 // call the hook functions
1200 $ok = Hooks::run( $event, $args, $deprecatedVersion );
1201
1202 // see if the hook changed the text
1203 foreach ( $contentTexts as $k => $orig ) {
1204 /* @var Content $content */
1205
1206 $modified = $args[$k];
1207 $content = $contentObjects[$k];
1208
1209 if ( $modified !== $orig ) {
1210 // text was changed, create updated Content object
1211 $content = $content->getContentHandler()->unserializeContent( $modified );
1212 }
1213
1214 $args[$k] = $content;
1215 }
1216
1217 return $ok;
1218 }
1219
1220 /**
1221 * Get fields definition for search index
1222 *
1223 * @todo Expose title, redirect, namespace, text, source_text, text_bytes
1224 * field mappings here. (see T142670 and T143409)
1225 *
1226 * @param SearchEngine $engine
1227 * @return SearchIndexField[] List of fields this content handler can provide.
1228 * @since 1.28
1229 */
1230 public function getFieldsForSearchIndex( SearchEngine $engine ) {
1231 $fields['category'] = $engine->makeSearchFieldMapping(
1232 'category',
1233 SearchIndexField::INDEX_TYPE_TEXT
1234 );
1235
1236 $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1237
1238 $fields['external_link'] = $engine->makeSearchFieldMapping(
1239 'external_link',
1240 SearchIndexField::INDEX_TYPE_KEYWORD
1241 );
1242
1243 $fields['outgoing_link'] = $engine->makeSearchFieldMapping(
1244 'outgoing_link',
1245 SearchIndexField::INDEX_TYPE_KEYWORD
1246 );
1247
1248 $fields['template'] = $engine->makeSearchFieldMapping(
1249 'template',
1250 SearchIndexField::INDEX_TYPE_KEYWORD
1251 );
1252
1253 $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1254
1255 return $fields;
1256 }
1257
1258 /**
1259 * Add new field definition to array.
1260 * @param SearchIndexField[] $fields
1261 * @param SearchEngine $engine
1262 * @param string $name
1263 * @param int $type
1264 * @return SearchIndexField[] new field defs
1265 * @since 1.28
1266 */
1267 protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) {
1268 $fields[$name] = $engine->makeSearchFieldMapping( $name, $type );
1269 return $fields;
1270 }
1271
1272 /**
1273 * Return fields to be indexed by search engine
1274 * as representation of this document.
1275 * Overriding class should call parent function or take care of calling
1276 * the SearchDataForIndex hook.
1277 * @param WikiPage $page Page to index
1278 * @param ParserOutput $output
1279 * @param SearchEngine $engine Search engine for which we are indexing
1280 * @return array Map of name=>value for fields
1281 * @since 1.28
1282 */
1283 public function getDataForSearchIndex( WikiPage $page, ParserOutput $output,
1284 SearchEngine $engine ) {
1285 $fieldData = [];
1286 $content = $page->getContent();
1287
1288 if ( $content ) {
1289 $searchDataExtractor = new ParserOutputSearchDataExtractor();
1290
1291 $fieldData['category'] = $searchDataExtractor->getCategories( $output );
1292 $fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
1293 $fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
1294 $fieldData['template'] = $searchDataExtractor->getTemplates( $output );
1295
1296 $text = $content->getTextForSearchIndex();
1297
1298 $fieldData['text'] = $text;
1299 $fieldData['source_text'] = $text;
1300 $fieldData['text_bytes'] = $content->getSize();
1301 }
1302
1303 Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
1304 return $fieldData;
1305 }
1306
1307 /**
1308 * Produce page output suitable for indexing.
1309 *
1310 * Specific content handlers may override it if they need different content handling.
1311 *
1312 * @param WikiPage $page
1313 * @param ParserCache $cache
1314 * @return ParserOutput
1315 */
1316 public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
1317 $parserOptions = $page->makeParserOptions( 'canonical' );
1318 $revId = $page->getRevision()->getId();
1319 if ( $cache ) {
1320 $parserOutput = $cache->get( $page, $parserOptions );
1321 }
1322 if ( empty( $parserOutput ) ) {
1323 $parserOutput =
1324 $page->getContent()->getParserOutput( $page->getTitle(), $revId, $parserOptions );
1325 if ( $cache ) {
1326 $cache->save( $parserOutput, $page, $parserOptions );
1327 }
1328 }
1329 return $parserOutput;
1330 }
1331
1332 }