Merge "Add CollationFa"
[lhc/web/wiklou.git] / includes / content / ContentHandler.php
1 <?php
2
3 use MediaWiki\Search\ParserOutputSearchDataExtractor;
4
5 /**
6 * Base class for content handling.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @since 1.21
24 *
25 * @file
26 * @ingroup Content
27 *
28 * @author Daniel Kinzler
29 */
30
31 /**
32 * Exception representing a failure to serialize or unserialize a content object.
33 *
34 * @ingroup Content
35 */
36 class MWContentSerializationException extends MWException {
37 }
38
39 /**
40 * Exception thrown when an unregistered content model is requested. This error
41 * can be triggered by user input, so a separate exception class is provided so
42 * callers can substitute a context-specific, internationalised error message.
43 *
44 * @ingroup Content
45 * @since 1.27
46 */
47 class MWUnknownContentModelException extends MWException {
48 /** @var string The name of the unknown content model */
49 private $modelId;
50
51 /** @param string $modelId */
52 function __construct( $modelId ) {
53 parent::__construct( "The content model '$modelId' is not registered on this wiki.\n" .
54 'See https://www.mediawiki.org/wiki/Content_handlers to find out which extensions ' .
55 'handle this content model.' );
56 $this->modelId = $modelId;
57 }
58
59 /** @return string */
60 public function getModelId() {
61 return $this->modelId;
62 }
63 }
64
65 /**
66 * A content handler knows how do deal with a specific type of content on a wiki
67 * page. Content is stored in the database in a serialized form (using a
68 * serialization format a.k.a. MIME type) and is unserialized into its native
69 * PHP representation (the content model), which is wrapped in an instance of
70 * the appropriate subclass of Content.
71 *
72 * ContentHandler instances are stateless singletons that serve, among other
73 * things, as a factory for Content objects. Generally, there is one subclass
74 * of ContentHandler and one subclass of Content for every type of content model.
75 *
76 * Some content types have a flat model, that is, their native representation
77 * is the same as their serialized form. Examples would be JavaScript and CSS
78 * code. As of now, this also applies to wikitext (MediaWiki's default content
79 * type), but wikitext content may be represented by a DOM or AST structure in
80 * the future.
81 *
82 * @ingroup Content
83 */
84 abstract class ContentHandler {
85 /**
86 * Convenience function for getting flat text from a Content object. This
87 * should only be used in the context of backwards compatibility with code
88 * that is not yet able to handle Content objects!
89 *
90 * If $content is null, this method returns the empty string.
91 *
92 * If $content is an instance of TextContent, this method returns the flat
93 * text as returned by $content->getNativeData().
94 *
95 * If $content is not a TextContent object, the behavior of this method
96 * depends on the global $wgContentHandlerTextFallback:
97 * - If $wgContentHandlerTextFallback is 'fail' and $content is not a
98 * TextContent object, an MWException is thrown.
99 * - If $wgContentHandlerTextFallback is 'serialize' and $content is not a
100 * TextContent object, $content->serialize() is called to get a string
101 * form of the content.
102 * - If $wgContentHandlerTextFallback is 'ignore' and $content is not a
103 * TextContent object, this method returns null.
104 * - otherwise, the behavior is undefined.
105 *
106 * @since 1.21
107 *
108 * @param Content $content
109 *
110 * @throws MWException If the content is not an instance of TextContent and
111 * wgContentHandlerTextFallback was set to 'fail'.
112 * @return string|null Textual form of the content, if available.
113 */
114 public static function getContentText( Content $content = null ) {
115 global $wgContentHandlerTextFallback;
116
117 if ( is_null( $content ) ) {
118 return '';
119 }
120
121 if ( $content instanceof TextContent ) {
122 return $content->getNativeData();
123 }
124
125 wfDebugLog( 'ContentHandler', 'Accessing ' . $content->getModel() . ' content as text!' );
126
127 if ( $wgContentHandlerTextFallback == 'fail' ) {
128 throw new MWException(
129 "Attempt to get text from Content with model " .
130 $content->getModel()
131 );
132 }
133
134 if ( $wgContentHandlerTextFallback == 'serialize' ) {
135 return $content->serialize();
136 }
137
138 return null;
139 }
140
141 /**
142 * Convenience function for creating a Content object from a given textual
143 * representation.
144 *
145 * $text will be deserialized into a Content object of the model specified
146 * by $modelId (or, if that is not given, $title->getContentModel()) using
147 * the given format.
148 *
149 * @since 1.21
150 *
151 * @param string $text The textual representation, will be
152 * unserialized to create the Content object
153 * @param Title $title The title of the page this text belongs to.
154 * Required if $modelId is not provided.
155 * @param string $modelId The model to deserialize to. If not provided,
156 * $title->getContentModel() is used.
157 * @param string $format The format to use for deserialization. If not
158 * given, the model's default format is used.
159 *
160 * @throws MWException If model ID or format is not supported or if the text can not be
161 * unserialized using the format.
162 * @return Content A Content object representing the text.
163 */
164 public static function makeContent( $text, Title $title = null,
165 $modelId = null, $format = null ) {
166 if ( is_null( $modelId ) ) {
167 if ( is_null( $title ) ) {
168 throw new MWException( "Must provide a Title object or a content model ID." );
169 }
170
171 $modelId = $title->getContentModel();
172 }
173
174 $handler = ContentHandler::getForModelID( $modelId );
175
176 return $handler->unserializeContent( $text, $format );
177 }
178
179 /**
180 * Returns the name of the default content model to be used for the page
181 * with the given title.
182 *
183 * Note: There should rarely be need to call this method directly.
184 * To determine the actual content model for a given page, use
185 * Title::getContentModel().
186 *
187 * Which model is to be used by default for the page is determined based
188 * on several factors:
189 * - The global setting $wgNamespaceContentModels specifies a content model
190 * per namespace.
191 * - The hook ContentHandlerDefaultModelFor may be used to override the page's default
192 * model.
193 * - Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript
194 * model if they end in .js or .css, respectively.
195 * - Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
196 * - The hook TitleIsCssOrJsPage may be used to force a page to use the CSS
197 * or JavaScript model. This is a compatibility feature. The ContentHandlerDefaultModelFor
198 * hook should be used instead if possible.
199 * - The hook TitleIsWikitextPage may be used to force a page to use the
200 * wikitext model. This is a compatibility feature. The ContentHandlerDefaultModelFor
201 * hook should be used instead if possible.
202 *
203 * If none of the above applies, the wikitext model is used.
204 *
205 * Note: this is used by, and may thus not use, Title::getContentModel()
206 *
207 * @since 1.21
208 *
209 * @param Title $title
210 *
211 * @return string Default model name for the page given by $title
212 */
213 public static function getDefaultModelFor( Title $title ) {
214 // NOTE: this method must not rely on $title->getContentModel() directly or indirectly,
215 // because it is used to initialize the mContentModel member.
216
217 $ns = $title->getNamespace();
218
219 $ext = false;
220 $m = null;
221 $model = MWNamespace::getNamespaceContentModel( $ns );
222
223 // Hook can determine default model
224 if ( !Hooks::run( 'ContentHandlerDefaultModelFor', [ $title, &$model ] ) ) {
225 if ( !is_null( $model ) ) {
226 return $model;
227 }
228 }
229
230 // Could this page contain code based on the title?
231 $isCodePage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js|json)$!u', $title->getText(), $m );
232 if ( $isCodePage ) {
233 $ext = $m[1];
234 }
235
236 // Hook can force JS/CSS
237 Hooks::run( 'TitleIsCssOrJsPage', [ $title, &$isCodePage ], '1.21' );
238
239 // Is this a user subpage containing code?
240 $isCodeSubpage = NS_USER == $ns
241 && !$isCodePage
242 && preg_match( "/\\/.*\\.(js|css|json)$/", $title->getText(), $m );
243 if ( $isCodeSubpage ) {
244 $ext = $m[1];
245 }
246
247 // Is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
248 $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
249 $isWikitext = $isWikitext && !$isCodePage && !$isCodeSubpage;
250
251 // Hook can override $isWikitext
252 Hooks::run( 'TitleIsWikitextPage', [ $title, &$isWikitext ], '1.21' );
253
254 if ( !$isWikitext ) {
255 switch ( $ext ) {
256 case 'js':
257 return CONTENT_MODEL_JAVASCRIPT;
258 case 'css':
259 return CONTENT_MODEL_CSS;
260 case 'json':
261 return CONTENT_MODEL_JSON;
262 default:
263 return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
264 }
265 }
266
267 // We established that it must be wikitext
268
269 return CONTENT_MODEL_WIKITEXT;
270 }
271
272 /**
273 * Returns the appropriate ContentHandler singleton for the given title.
274 *
275 * @since 1.21
276 *
277 * @param Title $title
278 *
279 * @return ContentHandler
280 */
281 public static function getForTitle( Title $title ) {
282 $modelId = $title->getContentModel();
283
284 return ContentHandler::getForModelID( $modelId );
285 }
286
287 /**
288 * Returns the appropriate ContentHandler singleton for the given Content
289 * object.
290 *
291 * @since 1.21
292 *
293 * @param Content $content
294 *
295 * @return ContentHandler
296 */
297 public static function getForContent( Content $content ) {
298 $modelId = $content->getModel();
299
300 return ContentHandler::getForModelID( $modelId );
301 }
302
303 /**
304 * @var array A Cache of ContentHandler instances by model id
305 */
306 protected static $handlers;
307
308 /**
309 * Returns the ContentHandler singleton for the given model ID. Use the
310 * CONTENT_MODEL_XXX constants to identify the desired content model.
311 *
312 * ContentHandler singletons are taken from the global $wgContentHandlers
313 * array. Keys in that array are model names, the values are either
314 * ContentHandler singleton objects, or strings specifying the appropriate
315 * subclass of ContentHandler.
316 *
317 * If a class name is encountered when looking up the singleton for a given
318 * model name, the class is instantiated and the class name is replaced by
319 * the resulting singleton in $wgContentHandlers.
320 *
321 * If no ContentHandler is defined for the desired $modelId, the
322 * ContentHandler may be provided by the ContentHandlerForModelID hook.
323 * If no ContentHandler can be determined, an MWException is raised.
324 *
325 * @since 1.21
326 *
327 * @param string $modelId The ID of the content model for which to get a
328 * handler. Use CONTENT_MODEL_XXX constants.
329 *
330 * @throws MWException For internal errors and problems in the configuration.
331 * @throws MWUnknownContentModelException If no handler is known for the model ID.
332 * @return ContentHandler The ContentHandler singleton for handling the model given by the ID.
333 */
334 public static function getForModelID( $modelId ) {
335 global $wgContentHandlers;
336
337 if ( isset( ContentHandler::$handlers[$modelId] ) ) {
338 return ContentHandler::$handlers[$modelId];
339 }
340
341 if ( empty( $wgContentHandlers[$modelId] ) ) {
342 $handler = null;
343
344 Hooks::run( 'ContentHandlerForModelID', [ $modelId, &$handler ] );
345
346 if ( $handler === null ) {
347 throw new MWUnknownContentModelException( $modelId );
348 }
349
350 if ( !( $handler instanceof ContentHandler ) ) {
351 throw new MWException( "ContentHandlerForModelID must supply a ContentHandler instance" );
352 }
353 } else {
354 $classOrCallback = $wgContentHandlers[$modelId];
355
356 if ( is_callable( $classOrCallback ) ) {
357 $handler = call_user_func( $classOrCallback, $modelId );
358 } else {
359 $handler = new $classOrCallback( $modelId );
360 }
361
362 if ( !( $handler instanceof ContentHandler ) ) {
363 throw new MWException( "$classOrCallback from \$wgContentHandlers is not " .
364 "compatible with ContentHandler" );
365 }
366 }
367
368 wfDebugLog( 'ContentHandler', 'Created handler for ' . $modelId
369 . ': ' . get_class( $handler ) );
370
371 ContentHandler::$handlers[$modelId] = $handler;
372
373 return ContentHandler::$handlers[$modelId];
374 }
375
376 /**
377 * Returns the localized name for a given content model.
378 *
379 * Model names are localized using system messages. Message keys
380 * have the form content-model-$name, where $name is getContentModelName( $id ).
381 *
382 * @param string $name The content model ID, as given by a CONTENT_MODEL_XXX
383 * constant or returned by Revision::getContentModel().
384 * @param Language|null $lang The language to parse the message in (since 1.26)
385 *
386 * @throws MWException If the model ID isn't known.
387 * @return string The content model's localized name.
388 */
389 public static function getLocalizedName( $name, Language $lang = null ) {
390 // Messages: content-model-wikitext, content-model-text,
391 // content-model-javascript, content-model-css
392 $key = "content-model-$name";
393
394 $msg = wfMessage( $key );
395 if ( $lang ) {
396 $msg->inLanguage( $lang );
397 }
398
399 return $msg->exists() ? $msg->plain() : $name;
400 }
401
402 public static function getContentModels() {
403 global $wgContentHandlers;
404
405 return array_keys( $wgContentHandlers );
406 }
407
408 public static function getAllContentFormats() {
409 global $wgContentHandlers;
410
411 $formats = [];
412
413 foreach ( $wgContentHandlers as $model => $class ) {
414 $handler = ContentHandler::getForModelID( $model );
415 $formats = array_merge( $formats, $handler->getSupportedFormats() );
416 }
417
418 $formats = array_unique( $formats );
419
420 return $formats;
421 }
422
423 // ------------------------------------------------------------------------
424
425 /**
426 * @var string
427 */
428 protected $mModelID;
429
430 /**
431 * @var string[]
432 */
433 protected $mSupportedFormats;
434
435 /**
436 * Constructor, initializing the ContentHandler instance with its model ID
437 * and a list of supported formats. Values for the parameters are typically
438 * provided as literals by subclass's constructors.
439 *
440 * @param string $modelId (use CONTENT_MODEL_XXX constants).
441 * @param string[] $formats List for supported serialization formats
442 * (typically as MIME types)
443 */
444 public function __construct( $modelId, $formats ) {
445 $this->mModelID = $modelId;
446 $this->mSupportedFormats = $formats;
447 }
448
449 /**
450 * Serializes a Content object of the type supported by this ContentHandler.
451 *
452 * @since 1.21
453 *
454 * @param Content $content The Content object to serialize
455 * @param string $format The desired serialization format
456 *
457 * @return string Serialized form of the content
458 */
459 abstract public function serializeContent( Content $content, $format = null );
460
461 /**
462 * Applies transformations on export (returns the blob unchanged per default).
463 * Subclasses may override this to perform transformations such as conversion
464 * of legacy formats or filtering of internal meta-data.
465 *
466 * @param string $blob The blob to be exported
467 * @param string|null $format The blob's serialization format
468 *
469 * @return string
470 */
471 public function exportTransform( $blob, $format = null ) {
472 return $blob;
473 }
474
475 /**
476 * Unserializes a Content object of the type supported by this ContentHandler.
477 *
478 * @since 1.21
479 *
480 * @param string $blob Serialized form of the content
481 * @param string $format The format used for serialization
482 *
483 * @return Content The Content object created by deserializing $blob
484 */
485 abstract public function unserializeContent( $blob, $format = null );
486
487 /**
488 * Apply import transformation (per default, returns $blob unchanged).
489 * This gives subclasses an opportunity to transform data blobs on import.
490 *
491 * @since 1.24
492 *
493 * @param string $blob
494 * @param string|null $format
495 *
496 * @return string
497 */
498 public function importTransform( $blob, $format = null ) {
499 return $blob;
500 }
501
502 /**
503 * Creates an empty Content object of the type supported by this
504 * ContentHandler.
505 *
506 * @since 1.21
507 *
508 * @return Content
509 */
510 abstract public function makeEmptyContent();
511
512 /**
513 * Creates a new Content object that acts as a redirect to the given page,
514 * or null if redirects are not supported by this content model.
515 *
516 * This default implementation always returns null. Subclasses supporting redirects
517 * must override this method.
518 *
519 * Note that subclasses that override this method to return a Content object
520 * should also override supportsRedirects() to return true.
521 *
522 * @since 1.21
523 *
524 * @param Title $destination The page to redirect to.
525 * @param string $text Text to include in the redirect, if possible.
526 *
527 * @return Content Always null.
528 */
529 public function makeRedirectContent( Title $destination, $text = '' ) {
530 return null;
531 }
532
533 /**
534 * Returns the model id that identifies the content model this
535 * ContentHandler can handle. Use with the CONTENT_MODEL_XXX constants.
536 *
537 * @since 1.21
538 *
539 * @return string The model ID
540 */
541 public function getModelID() {
542 return $this->mModelID;
543 }
544
545 /**
546 * @since 1.21
547 *
548 * @param string $model_id The model to check
549 *
550 * @throws MWException If the model ID is not the ID of the content model supported by this
551 * ContentHandler.
552 */
553 protected function checkModelID( $model_id ) {
554 if ( $model_id !== $this->mModelID ) {
555 throw new MWException( "Bad content model: " .
556 "expected {$this->mModelID} " .
557 "but got $model_id." );
558 }
559 }
560
561 /**
562 * Returns a list of serialization formats supported by the
563 * serializeContent() and unserializeContent() methods of this
564 * ContentHandler.
565 *
566 * @since 1.21
567 *
568 * @return string[] List of serialization formats as MIME type like strings
569 */
570 public function getSupportedFormats() {
571 return $this->mSupportedFormats;
572 }
573
574 /**
575 * The format used for serialization/deserialization by default by this
576 * ContentHandler.
577 *
578 * This default implementation will return the first element of the array
579 * of formats that was passed to the constructor.
580 *
581 * @since 1.21
582 *
583 * @return string The name of the default serialization format as a MIME type
584 */
585 public function getDefaultFormat() {
586 return $this->mSupportedFormats[0];
587 }
588
589 /**
590 * Returns true if $format is a serialization format supported by this
591 * ContentHandler, and false otherwise.
592 *
593 * Note that if $format is null, this method always returns true, because
594 * null means "use the default format".
595 *
596 * @since 1.21
597 *
598 * @param string $format The serialization format to check
599 *
600 * @return bool
601 */
602 public function isSupportedFormat( $format ) {
603 if ( !$format ) {
604 return true; // this means "use the default"
605 }
606
607 return in_array( $format, $this->mSupportedFormats );
608 }
609
610 /**
611 * Convenient for checking whether a format provided as a parameter is actually supported.
612 *
613 * @param string $format The serialization format to check
614 *
615 * @throws MWException If the format is not supported by this content handler.
616 */
617 protected function checkFormat( $format ) {
618 if ( !$this->isSupportedFormat( $format ) ) {
619 throw new MWException(
620 "Format $format is not supported for content model "
621 . $this->getModelID()
622 );
623 }
624 }
625
626 /**
627 * Returns overrides for action handlers.
628 * Classes listed here will be used instead of the default one when
629 * (and only when) $wgActions[$action] === true. This allows subclasses
630 * to override the default action handlers.
631 *
632 * @since 1.21
633 *
634 * @return array An array mapping action names (typically "view", "edit", "history" etc.) to
635 * either the full qualified class name of an Action class, a callable taking ( Page $page,
636 * IContextSource $context = null ) as parameters and returning an Action object, or an actual
637 * Action object. An empty array in this default implementation.
638 *
639 * @see Action::factory
640 */
641 public function getActionOverrides() {
642 return [];
643 }
644
645 /**
646 * Factory for creating an appropriate DifferenceEngine for this content model.
647 *
648 * @since 1.21
649 *
650 * @param IContextSource $context Context to use, anything else will be ignored.
651 * @param int $old Revision ID we want to show and diff with.
652 * @param int|string $new Either a revision ID or one of the strings 'cur', 'prev' or 'next'.
653 * @param int $rcid FIXME: Deprecated, no longer used. Defaults to 0.
654 * @param bool $refreshCache If set, refreshes the diff cache. Defaults to false.
655 * @param bool $unhide If set, allow viewing deleted revs. Defaults to false.
656 *
657 * @return DifferenceEngine
658 */
659 public function createDifferenceEngine( IContextSource $context, $old = 0, $new = 0,
660 $rcid = 0, // FIXME: Deprecated, no longer used
661 $refreshCache = false, $unhide = false ) {
662
663 // hook: get difference engine
664 $differenceEngine = null;
665 if ( !Hooks::run( 'GetDifferenceEngine',
666 [ $context, $old, $new, $refreshCache, $unhide, &$differenceEngine ]
667 ) ) {
668 return $differenceEngine;
669 }
670 $diffEngineClass = $this->getDiffEngineClass();
671 return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
672 }
673
674 /**
675 * Get the language in which the content of the given page is written.
676 *
677 * This default implementation just returns $wgContLang (except for pages
678 * in the MediaWiki namespace)
679 *
680 * Note that the pages language is not cacheable, since it may in some
681 * cases depend on user settings.
682 *
683 * Also note that the page language may or may not depend on the actual content of the page,
684 * that is, this method may load the content in order to determine the language.
685 *
686 * @since 1.21
687 *
688 * @param Title $title The page to determine the language for.
689 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
690 *
691 * @return Language The page's language
692 */
693 public function getPageLanguage( Title $title, Content $content = null ) {
694 global $wgContLang, $wgLang;
695 $pageLang = $wgContLang;
696
697 if ( $title->getNamespace() == NS_MEDIAWIKI ) {
698 // Parse mediawiki messages with correct target language
699 list( /* $unused */, $lang ) = MessageCache::singleton()->figureMessage( $title->getText() );
700 $pageLang = Language::factory( $lang );
701 }
702
703 Hooks::run( 'PageContentLanguage', [ $title, &$pageLang, $wgLang ] );
704
705 return wfGetLangObj( $pageLang );
706 }
707
708 /**
709 * Get the language in which the content of this page is written when
710 * viewed by user. Defaults to $this->getPageLanguage(), but if the user
711 * specified a preferred variant, the variant will be used.
712 *
713 * This default implementation just returns $this->getPageLanguage( $title, $content ) unless
714 * the user specified a preferred variant.
715 *
716 * Note that the pages view language is not cacheable, since it depends on user settings.
717 *
718 * Also note that the page language may or may not depend on the actual content of the page,
719 * that is, this method may load the content in order to determine the language.
720 *
721 * @since 1.21
722 *
723 * @param Title $title The page to determine the language for.
724 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
725 *
726 * @return Language The page's language for viewing
727 */
728 public function getPageViewLanguage( Title $title, Content $content = null ) {
729 $pageLang = $this->getPageLanguage( $title, $content );
730
731 if ( $title->getNamespace() !== NS_MEDIAWIKI ) {
732 // If the user chooses a variant, the content is actually
733 // in a language whose code is the variant code.
734 $variant = $pageLang->getPreferredVariant();
735 if ( $pageLang->getCode() !== $variant ) {
736 $pageLang = Language::factory( $variant );
737 }
738 }
739
740 return $pageLang;
741 }
742
743 /**
744 * Determines whether the content type handled by this ContentHandler
745 * can be used on the given page.
746 *
747 * This default implementation always returns true.
748 * Subclasses may override this to restrict the use of this content model to specific locations,
749 * typically based on the namespace or some other aspect of the title, such as a special suffix
750 * (e.g. ".svg" for SVG content).
751 *
752 * @note this calls the ContentHandlerCanBeUsedOn hook which may be used to override which
753 * content model can be used where.
754 *
755 * @param Title $title The page's title.
756 *
757 * @return bool True if content of this kind can be used on the given page, false otherwise.
758 */
759 public function canBeUsedOn( Title $title ) {
760 $ok = true;
761
762 Hooks::run( 'ContentModelCanBeUsedOn', [ $this->getModelID(), $title, &$ok ] );
763
764 return $ok;
765 }
766
767 /**
768 * Returns the name of the diff engine to use.
769 *
770 * @since 1.21
771 *
772 * @return string
773 */
774 protected function getDiffEngineClass() {
775 return DifferenceEngine::class;
776 }
777
778 /**
779 * Attempts to merge differences between three versions. Returns a new
780 * Content object for a clean merge and false for failure or a conflict.
781 *
782 * This default implementation always returns false.
783 *
784 * @since 1.21
785 *
786 * @param Content $oldContent The page's previous content.
787 * @param Content $myContent One of the page's conflicting contents.
788 * @param Content $yourContent One of the page's conflicting contents.
789 *
790 * @return Content|bool Always false.
791 */
792 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
793 return false;
794 }
795
796 /**
797 * Return an applicable auto-summary if one exists for the given edit.
798 *
799 * @since 1.21
800 *
801 * @param Content $oldContent The previous text of the page.
802 * @param Content $newContent The submitted text of the page.
803 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
804 *
805 * @return string An appropriate auto-summary, or an empty string.
806 */
807 public function getAutosummary( Content $oldContent = null, Content $newContent = null,
808 $flags ) {
809 // Decide what kind of auto-summary is needed.
810
811 // Redirect auto-summaries
812
813 /**
814 * @var $ot Title
815 * @var $rt Title
816 */
817
818 $ot = !is_null( $oldContent ) ? $oldContent->getRedirectTarget() : null;
819 $rt = !is_null( $newContent ) ? $newContent->getRedirectTarget() : null;
820
821 if ( is_object( $rt ) ) {
822 if ( !is_object( $ot )
823 || !$rt->equals( $ot )
824 || $ot->getFragment() != $rt->getFragment()
825 ) {
826 $truncatedtext = $newContent->getTextForSummary(
827 250
828 - strlen( wfMessage( 'autoredircomment' )->inContentLanguage()->text() )
829 - strlen( $rt->getFullText() ) );
830
831 return wfMessage( 'autoredircomment', $rt->getFullText() )
832 ->rawParams( $truncatedtext )->inContentLanguage()->text();
833 }
834 }
835
836 // New page auto-summaries
837 if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
838 // If they're making a new article, give its text, truncated, in
839 // the summary.
840
841 $truncatedtext = $newContent->getTextForSummary(
842 200 - strlen( wfMessage( 'autosumm-new' )->inContentLanguage()->text() ) );
843
844 return wfMessage( 'autosumm-new' )->rawParams( $truncatedtext )
845 ->inContentLanguage()->text();
846 }
847
848 // Blanking auto-summaries
849 if ( !empty( $oldContent ) && $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
850 return wfMessage( 'autosumm-blank' )->inContentLanguage()->text();
851 } elseif ( !empty( $oldContent )
852 && $oldContent->getSize() > 10 * $newContent->getSize()
853 && $newContent->getSize() < 500
854 ) {
855 // Removing more than 90% of the article
856
857 $truncatedtext = $newContent->getTextForSummary(
858 200 - strlen( wfMessage( 'autosumm-replace' )->inContentLanguage()->text() ) );
859
860 return wfMessage( 'autosumm-replace' )->rawParams( $truncatedtext )
861 ->inContentLanguage()->text();
862 }
863
864 // New blank article auto-summary
865 if ( $flags & EDIT_NEW && $newContent->isEmpty() ) {
866 return wfMessage( 'autosumm-newblank' )->inContentLanguage()->text();
867 }
868
869 // If we reach this point, there's no applicable auto-summary for our
870 // case, so our auto-summary is empty.
871 return '';
872 }
873
874 /**
875 * Auto-generates a deletion reason
876 *
877 * @since 1.21
878 *
879 * @param Title $title The page's title
880 * @param bool &$hasHistory Whether the page has a history
881 *
882 * @return mixed String containing deletion reason or empty string, or
883 * boolean false if no revision occurred
884 *
885 * @todo &$hasHistory is extremely ugly, it's here because
886 * WikiPage::getAutoDeleteReason() and Article::generateReason()
887 * have it / want it.
888 */
889 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
890 $dbr = wfGetDB( DB_REPLICA );
891
892 // Get the last revision
893 $rev = Revision::newFromTitle( $title );
894
895 if ( is_null( $rev ) ) {
896 return false;
897 }
898
899 // Get the article's contents
900 $content = $rev->getContent();
901 $blank = false;
902
903 // If the page is blank, use the text from the previous revision,
904 // which can only be blank if there's a move/import/protect dummy
905 // revision involved
906 if ( !$content || $content->isEmpty() ) {
907 $prev = $rev->getPrevious();
908
909 if ( $prev ) {
910 $rev = $prev;
911 $content = $rev->getContent();
912 $blank = true;
913 }
914 }
915
916 $this->checkModelID( $rev->getContentModel() );
917
918 // Find out if there was only one contributor
919 // Only scan the last 20 revisions
920 $res = $dbr->select( 'revision', 'rev_user_text',
921 [
922 'rev_page' => $title->getArticleID(),
923 $dbr->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0'
924 ],
925 __METHOD__,
926 [ 'LIMIT' => 20 ]
927 );
928
929 if ( $res === false ) {
930 // This page has no revisions, which is very weird
931 return false;
932 }
933
934 $hasHistory = ( $res->numRows() > 1 );
935 $row = $dbr->fetchObject( $res );
936
937 if ( $row ) { // $row is false if the only contributor is hidden
938 $onlyAuthor = $row->rev_user_text;
939 // Try to find a second contributor
940 foreach ( $res as $row ) {
941 if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
942 $onlyAuthor = false;
943 break;
944 }
945 }
946 } else {
947 $onlyAuthor = false;
948 }
949
950 // Generate the summary with a '$1' placeholder
951 if ( $blank ) {
952 // The current revision is blank and the one before is also
953 // blank. It's just not our lucky day
954 $reason = wfMessage( 'exbeforeblank', '$1' )->inContentLanguage()->text();
955 } else {
956 if ( $onlyAuthor ) {
957 $reason = wfMessage(
958 'excontentauthor',
959 '$1',
960 $onlyAuthor
961 )->inContentLanguage()->text();
962 } else {
963 $reason = wfMessage( 'excontent', '$1' )->inContentLanguage()->text();
964 }
965 }
966
967 if ( $reason == '-' ) {
968 // Allow these UI messages to be blanked out cleanly
969 return '';
970 }
971
972 // Max content length = max comment length - length of the comment (excl. $1)
973 $text = $content ? $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ) : '';
974
975 // Now replace the '$1' placeholder
976 $reason = str_replace( '$1', $text, $reason );
977
978 return $reason;
979 }
980
981 /**
982 * Get the Content object that needs to be saved in order to undo all revisions
983 * between $undo and $undoafter. Revisions must belong to the same page,
984 * must exist and must not be deleted.
985 *
986 * @since 1.21
987 *
988 * @param Revision $current The current text
989 * @param Revision $undo The revision to undo
990 * @param Revision $undoafter Must be an earlier revision than $undo
991 *
992 * @return mixed String on success, false on failure
993 */
994 public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter ) {
995 $cur_content = $current->getContent();
996
997 if ( empty( $cur_content ) ) {
998 return false; // no page
999 }
1000
1001 $undo_content = $undo->getContent();
1002 $undoafter_content = $undoafter->getContent();
1003
1004 if ( !$undo_content || !$undoafter_content ) {
1005 return false; // no content to undo
1006 }
1007
1008 try {
1009 $this->checkModelID( $cur_content->getModel() );
1010 $this->checkModelID( $undo_content->getModel() );
1011 if ( $current->getId() !== $undo->getId() ) {
1012 // If we are undoing the most recent revision,
1013 // its ok to revert content model changes. However
1014 // if we are undoing a revision in the middle, then
1015 // doing that will be confusing.
1016 $this->checkModelID( $undoafter_content->getModel() );
1017 }
1018 } catch ( MWException $e ) {
1019 // If the revisions have different content models
1020 // just return false
1021 return false;
1022 }
1023
1024 if ( $cur_content->equals( $undo_content ) ) {
1025 // No use doing a merge if it's just a straight revert.
1026 return $undoafter_content;
1027 }
1028
1029 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
1030
1031 return $undone_content;
1032 }
1033
1034 /**
1035 * Get parser options suitable for rendering and caching the article
1036 *
1037 * @param IContextSource|User|string $context One of the following:
1038 * - IContextSource: Use the User and the Language of the provided
1039 * context
1040 * - User: Use the provided User object and $wgLang for the language,
1041 * so use an IContextSource object if possible.
1042 * - 'canonical': Canonical options (anonymous user with default
1043 * preferences and content language).
1044 *
1045 * @throws MWException
1046 * @return ParserOptions
1047 */
1048 public function makeParserOptions( $context ) {
1049 global $wgContLang, $wgEnableParserLimitReporting;
1050
1051 if ( $context instanceof IContextSource ) {
1052 $options = ParserOptions::newFromContext( $context );
1053 } elseif ( $context instanceof User ) { // settings per user (even anons)
1054 $options = ParserOptions::newFromUser( $context );
1055 } elseif ( $context === 'canonical' ) { // canonical settings
1056 $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
1057 } else {
1058 throw new MWException( "Bad context for parser options: $context" );
1059 }
1060
1061 $options->enableLimitReport( $wgEnableParserLimitReporting ); // show inclusion/loop reports
1062 $options->setTidy( true ); // fix bad HTML
1063
1064 return $options;
1065 }
1066
1067 /**
1068 * Returns true for content models that support caching using the
1069 * ParserCache mechanism. See WikiPage::shouldCheckParserCache().
1070 *
1071 * @since 1.21
1072 *
1073 * @return bool Always false.
1074 */
1075 public function isParserCacheSupported() {
1076 return false;
1077 }
1078
1079 /**
1080 * Returns true if this content model supports sections.
1081 * This default implementation returns false.
1082 *
1083 * Content models that return true here should also implement
1084 * Content::getSection, Content::replaceSection, etc. to handle sections..
1085 *
1086 * @return bool Always false.
1087 */
1088 public function supportsSections() {
1089 return false;
1090 }
1091
1092 /**
1093 * Returns true if this content model supports categories.
1094 * The default implementation returns true.
1095 *
1096 * @return bool Always true.
1097 */
1098 public function supportsCategories() {
1099 return true;
1100 }
1101
1102 /**
1103 * Returns true if this content model supports redirects.
1104 * This default implementation returns false.
1105 *
1106 * Content models that return true here should also implement
1107 * ContentHandler::makeRedirectContent to return a Content object.
1108 *
1109 * @return bool Always false.
1110 */
1111 public function supportsRedirects() {
1112 return false;
1113 }
1114
1115 /**
1116 * Return true if this content model supports direct editing, such as via EditPage.
1117 *
1118 * @return bool Default is false, and true for TextContent and it's derivatives.
1119 */
1120 public function supportsDirectEditing() {
1121 return false;
1122 }
1123
1124 /**
1125 * Whether or not this content model supports direct editing via ApiEditPage
1126 *
1127 * @return bool Default is false, and true for TextContent and derivatives.
1128 */
1129 public function supportsDirectApiEditing() {
1130 return $this->supportsDirectEditing();
1131 }
1132
1133 /**
1134 * Call a legacy hook that uses text instead of Content objects.
1135 * Will log a warning when a matching hook function is registered.
1136 * If the textual representation of the content is changed by the
1137 * hook function, a new Content object is constructed from the new
1138 * text.
1139 *
1140 * @param string $event Event name
1141 * @param array $args Parameters passed to hook functions
1142 * @param string|null $deprecatedVersion Emit a deprecation notice
1143 * when the hook is run for the provided version
1144 *
1145 * @return bool True if no handler aborted the hook
1146 */
1147 public static function runLegacyHooks( $event, $args = [],
1148 $deprecatedVersion = null
1149 ) {
1150
1151 if ( !Hooks::isRegistered( $event ) ) {
1152 return true; // nothing to do here
1153 }
1154
1155 // convert Content objects to text
1156 $contentObjects = [];
1157 $contentTexts = [];
1158
1159 foreach ( $args as $k => $v ) {
1160 if ( $v instanceof Content ) {
1161 /* @var Content $v */
1162
1163 $contentObjects[$k] = $v;
1164
1165 $v = $v->serialize();
1166 $contentTexts[$k] = $v;
1167 $args[$k] = $v;
1168 }
1169 }
1170
1171 // call the hook functions
1172 $ok = Hooks::run( $event, $args, $deprecatedVersion );
1173
1174 // see if the hook changed the text
1175 foreach ( $contentTexts as $k => $orig ) {
1176 /* @var Content $content */
1177
1178 $modified = $args[$k];
1179 $content = $contentObjects[$k];
1180
1181 if ( $modified !== $orig ) {
1182 // text was changed, create updated Content object
1183 $content = $content->getContentHandler()->unserializeContent( $modified );
1184 }
1185
1186 $args[$k] = $content;
1187 }
1188
1189 return $ok;
1190 }
1191
1192 /**
1193 * Get fields definition for search index
1194 *
1195 * @todo Expose title, redirect, namespace, text, source_text, text_bytes
1196 * field mappings here. (see T142670 and T143409)
1197 *
1198 * @param SearchEngine $engine
1199 * @return SearchIndexField[] List of fields this content handler can provide.
1200 * @since 1.28
1201 */
1202 public function getFieldsForSearchIndex( SearchEngine $engine ) {
1203 $fields['category'] = $engine->makeSearchFieldMapping(
1204 'category',
1205 SearchIndexField::INDEX_TYPE_TEXT
1206 );
1207
1208 $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1209
1210 $fields['external_link'] = $engine->makeSearchFieldMapping(
1211 'external_link',
1212 SearchIndexField::INDEX_TYPE_KEYWORD
1213 );
1214
1215 $fields['outgoing_link'] = $engine->makeSearchFieldMapping(
1216 'outgoing_link',
1217 SearchIndexField::INDEX_TYPE_KEYWORD
1218 );
1219
1220 $fields['template'] = $engine->makeSearchFieldMapping(
1221 'template',
1222 SearchIndexField::INDEX_TYPE_KEYWORD
1223 );
1224
1225 $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1226
1227 return $fields;
1228 }
1229
1230 /**
1231 * Add new field definition to array.
1232 * @param SearchIndexField[] $fields
1233 * @param SearchEngine $engine
1234 * @param string $name
1235 * @param int $type
1236 * @return SearchIndexField[] new field defs
1237 * @since 1.28
1238 */
1239 protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) {
1240 $fields[$name] = $engine->makeSearchFieldMapping( $name, $type );
1241 return $fields;
1242 }
1243
1244 /**
1245 * Return fields to be indexed by search engine
1246 * as representation of this document.
1247 * Overriding class should call parent function or take care of calling
1248 * the SearchDataForIndex hook.
1249 * @param WikiPage $page Page to index
1250 * @param ParserOutput $output
1251 * @param SearchEngine $engine Search engine for which we are indexing
1252 * @return array Map of name=>value for fields
1253 * @since 1.28
1254 */
1255 public function getDataForSearchIndex( WikiPage $page, ParserOutput $output,
1256 SearchEngine $engine ) {
1257 $fieldData = [];
1258 $content = $page->getContent();
1259
1260 if ( $content ) {
1261 $searchDataExtractor = new ParserOutputSearchDataExtractor();
1262
1263 $fieldData['category'] = $searchDataExtractor->getCategories( $output );
1264 $fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
1265 $fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
1266 $fieldData['template'] = $searchDataExtractor->getTemplates( $output );
1267
1268 $text = $content->getTextForSearchIndex();
1269
1270 $fieldData['text'] = $text;
1271 $fieldData['source_text'] = $text;
1272 $fieldData['text_bytes'] = $content->getSize();
1273 }
1274
1275 Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
1276 return $fieldData;
1277 }
1278
1279 /**
1280 * Produce page output suitable for indexing.
1281 *
1282 * Specific content handlers may override it if they need different content handling.
1283 *
1284 * @param WikiPage $page
1285 * @param ParserCache $cache
1286 * @return ParserOutput
1287 */
1288 public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
1289 $parserOptions = $page->makeParserOptions( 'canonical' );
1290 $revId = $page->getRevision()->getId();
1291 if ( $cache ) {
1292 $parserOutput = $cache->get( $page, $parserOptions );
1293 }
1294 if ( empty( $parserOutput ) ) {
1295 $parserOutput =
1296 $page->getContent()->getParserOutput( $page->getTitle(), $revId, $parserOptions );
1297 if ( $cache ) {
1298 $cache->save( $parserOutput, $page, $parserOptions );
1299 }
1300 }
1301 return $parserOutput;
1302 }
1303
1304 }