Fix 'Tags' padding to keep it farther from the edge and document the source of the...
[lhc/web/wiklou.git] / includes / content / ContentHandler.php
1 <?php
2
3 use MediaWiki\Search\ParserOutputSearchDataExtractor;
4
5 /**
6 * Base class for content handling.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @since 1.21
24 *
25 * @file
26 * @ingroup Content
27 *
28 * @author Daniel Kinzler
29 */
30 /**
31 * A content handler knows how do deal with a specific type of content on a wiki
32 * page. Content is stored in the database in a serialized form (using a
33 * serialization format a.k.a. MIME type) and is unserialized into its native
34 * PHP representation (the content model), which is wrapped in an instance of
35 * the appropriate subclass of Content.
36 *
37 * ContentHandler instances are stateless singletons that serve, among other
38 * things, as a factory for Content objects. Generally, there is one subclass
39 * of ContentHandler and one subclass of Content for every type of content model.
40 *
41 * Some content types have a flat model, that is, their native representation
42 * is the same as their serialized form. Examples would be JavaScript and CSS
43 * code. As of now, this also applies to wikitext (MediaWiki's default content
44 * type), but wikitext content may be represented by a DOM or AST structure in
45 * the future.
46 *
47 * @ingroup Content
48 */
49 abstract class ContentHandler {
50 /**
51 * Convenience function for getting flat text from a Content object. This
52 * should only be used in the context of backwards compatibility with code
53 * that is not yet able to handle Content objects!
54 *
55 * If $content is null, this method returns the empty string.
56 *
57 * If $content is an instance of TextContent, this method returns the flat
58 * text as returned by $content->getNativeData().
59 *
60 * If $content is not a TextContent object, the behavior of this method
61 * depends on the global $wgContentHandlerTextFallback:
62 * - If $wgContentHandlerTextFallback is 'fail' and $content is not a
63 * TextContent object, an MWException is thrown.
64 * - If $wgContentHandlerTextFallback is 'serialize' and $content is not a
65 * TextContent object, $content->serialize() is called to get a string
66 * form of the content.
67 * - If $wgContentHandlerTextFallback is 'ignore' and $content is not a
68 * TextContent object, this method returns null.
69 * - otherwise, the behavior is undefined.
70 *
71 * @since 1.21
72 *
73 * @param Content $content
74 *
75 * @throws MWException If the content is not an instance of TextContent and
76 * wgContentHandlerTextFallback was set to 'fail'.
77 * @return string|null Textual form of the content, if available.
78 */
79 public static function getContentText( Content $content = null ) {
80 global $wgContentHandlerTextFallback;
81
82 if ( is_null( $content ) ) {
83 return '';
84 }
85
86 if ( $content instanceof TextContent ) {
87 return $content->getNativeData();
88 }
89
90 wfDebugLog( 'ContentHandler', 'Accessing ' . $content->getModel() . ' content as text!' );
91
92 if ( $wgContentHandlerTextFallback == 'fail' ) {
93 throw new MWException(
94 "Attempt to get text from Content with model " .
95 $content->getModel()
96 );
97 }
98
99 if ( $wgContentHandlerTextFallback == 'serialize' ) {
100 return $content->serialize();
101 }
102
103 return null;
104 }
105
106 /**
107 * Convenience function for creating a Content object from a given textual
108 * representation.
109 *
110 * $text will be deserialized into a Content object of the model specified
111 * by $modelId (or, if that is not given, $title->getContentModel()) using
112 * the given format.
113 *
114 * @since 1.21
115 *
116 * @param string $text The textual representation, will be
117 * unserialized to create the Content object
118 * @param Title $title The title of the page this text belongs to.
119 * Required if $modelId is not provided.
120 * @param string $modelId The model to deserialize to. If not provided,
121 * $title->getContentModel() is used.
122 * @param string $format The format to use for deserialization. If not
123 * given, the model's default format is used.
124 *
125 * @throws MWException If model ID or format is not supported or if the text can not be
126 * unserialized using the format.
127 * @return Content A Content object representing the text.
128 */
129 public static function makeContent( $text, Title $title = null,
130 $modelId = null, $format = null ) {
131 if ( is_null( $modelId ) ) {
132 if ( is_null( $title ) ) {
133 throw new MWException( "Must provide a Title object or a content model ID." );
134 }
135
136 $modelId = $title->getContentModel();
137 }
138
139 $handler = self::getForModelID( $modelId );
140
141 return $handler->unserializeContent( $text, $format );
142 }
143
144 /**
145 * Returns the name of the default content model to be used for the page
146 * with the given title.
147 *
148 * Note: There should rarely be need to call this method directly.
149 * To determine the actual content model for a given page, use
150 * Title::getContentModel().
151 *
152 * Which model is to be used by default for the page is determined based
153 * on several factors:
154 * - The global setting $wgNamespaceContentModels specifies a content model
155 * per namespace.
156 * - The hook ContentHandlerDefaultModelFor may be used to override the page's default
157 * model.
158 * - Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript
159 * model if they end in .js or .css, respectively.
160 * - Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
161 * - The hook TitleIsCssOrJsPage may be used to force a page to use the CSS
162 * or JavaScript model. This is a compatibility feature. The ContentHandlerDefaultModelFor
163 * hook should be used instead if possible.
164 * - The hook TitleIsWikitextPage may be used to force a page to use the
165 * wikitext model. This is a compatibility feature. The ContentHandlerDefaultModelFor
166 * hook should be used instead if possible.
167 *
168 * If none of the above applies, the wikitext model is used.
169 *
170 * Note: this is used by, and may thus not use, Title::getContentModel()
171 *
172 * @since 1.21
173 *
174 * @param Title $title
175 *
176 * @return string Default model name for the page given by $title
177 */
178 public static function getDefaultModelFor( Title $title ) {
179 // NOTE: this method must not rely on $title->getContentModel() directly or indirectly,
180 // because it is used to initialize the mContentModel member.
181
182 $ns = $title->getNamespace();
183
184 $ext = false;
185 $m = null;
186 $model = MWNamespace::getNamespaceContentModel( $ns );
187
188 // Hook can determine default model
189 if ( !Hooks::run( 'ContentHandlerDefaultModelFor', [ $title, &$model ] ) ) {
190 if ( !is_null( $model ) ) {
191 return $model;
192 }
193 }
194
195 // Could this page contain code based on the title?
196 $isCodePage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js|json)$!u', $title->getText(), $m );
197 if ( $isCodePage ) {
198 $ext = $m[1];
199 }
200
201 // Is this a user subpage containing code?
202 $isCodeSubpage = NS_USER == $ns
203 && !$isCodePage
204 && preg_match( "/\\/.*\\.(js|css|json)$/", $title->getText(), $m );
205 if ( $isCodeSubpage ) {
206 $ext = $m[1];
207 }
208
209 // Is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
210 $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
211 $isWikitext = $isWikitext && !$isCodePage && !$isCodeSubpage;
212
213 if ( !$isWikitext ) {
214 switch ( $ext ) {
215 case 'js':
216 return CONTENT_MODEL_JAVASCRIPT;
217 case 'css':
218 return CONTENT_MODEL_CSS;
219 case 'json':
220 return CONTENT_MODEL_JSON;
221 default:
222 return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
223 }
224 }
225
226 // We established that it must be wikitext
227
228 return CONTENT_MODEL_WIKITEXT;
229 }
230
231 /**
232 * Returns the appropriate ContentHandler singleton for the given title.
233 *
234 * @since 1.21
235 *
236 * @param Title $title
237 *
238 * @return ContentHandler
239 */
240 public static function getForTitle( Title $title ) {
241 $modelId = $title->getContentModel();
242
243 return self::getForModelID( $modelId );
244 }
245
246 /**
247 * Returns the appropriate ContentHandler singleton for the given Content
248 * object.
249 *
250 * @since 1.21
251 *
252 * @param Content $content
253 *
254 * @return ContentHandler
255 */
256 public static function getForContent( Content $content ) {
257 $modelId = $content->getModel();
258
259 return self::getForModelID( $modelId );
260 }
261
262 /**
263 * @var array A Cache of ContentHandler instances by model id
264 */
265 protected static $handlers;
266
267 /**
268 * Returns the ContentHandler singleton for the given model ID. Use the
269 * CONTENT_MODEL_XXX constants to identify the desired content model.
270 *
271 * ContentHandler singletons are taken from the global $wgContentHandlers
272 * array. Keys in that array are model names, the values are either
273 * ContentHandler singleton objects, or strings specifying the appropriate
274 * subclass of ContentHandler.
275 *
276 * If a class name is encountered when looking up the singleton for a given
277 * model name, the class is instantiated and the class name is replaced by
278 * the resulting singleton in $wgContentHandlers.
279 *
280 * If no ContentHandler is defined for the desired $modelId, the
281 * ContentHandler may be provided by the ContentHandlerForModelID hook.
282 * If no ContentHandler can be determined, an MWException is raised.
283 *
284 * @since 1.21
285 *
286 * @param string $modelId The ID of the content model for which to get a
287 * handler. Use CONTENT_MODEL_XXX constants.
288 *
289 * @throws MWException For internal errors and problems in the configuration.
290 * @throws MWUnknownContentModelException If no handler is known for the model ID.
291 * @return ContentHandler The ContentHandler singleton for handling the model given by the ID.
292 */
293 public static function getForModelID( $modelId ) {
294 global $wgContentHandlers;
295
296 if ( isset( self::$handlers[$modelId] ) ) {
297 return self::$handlers[$modelId];
298 }
299
300 if ( empty( $wgContentHandlers[$modelId] ) ) {
301 $handler = null;
302
303 Hooks::run( 'ContentHandlerForModelID', [ $modelId, &$handler ] );
304
305 if ( $handler === null ) {
306 throw new MWUnknownContentModelException( $modelId );
307 }
308
309 if ( !( $handler instanceof ContentHandler ) ) {
310 throw new MWException( "ContentHandlerForModelID must supply a ContentHandler instance" );
311 }
312 } else {
313 $classOrCallback = $wgContentHandlers[$modelId];
314
315 if ( is_callable( $classOrCallback ) ) {
316 $handler = call_user_func( $classOrCallback, $modelId );
317 } else {
318 $handler = new $classOrCallback( $modelId );
319 }
320
321 if ( !( $handler instanceof ContentHandler ) ) {
322 throw new MWException( "$classOrCallback from \$wgContentHandlers is not " .
323 "compatible with ContentHandler" );
324 }
325 }
326
327 wfDebugLog( 'ContentHandler', 'Created handler for ' . $modelId
328 . ': ' . get_class( $handler ) );
329
330 self::$handlers[$modelId] = $handler;
331
332 return self::$handlers[$modelId];
333 }
334
335 /**
336 * Clean up handlers cache.
337 */
338 public static function cleanupHandlersCache() {
339 self::$handlers = [];
340 }
341
342 /**
343 * Returns the localized name for a given content model.
344 *
345 * Model names are localized using system messages. Message keys
346 * have the form content-model-$name, where $name is getContentModelName( $id ).
347 *
348 * @param string $name The content model ID, as given by a CONTENT_MODEL_XXX
349 * constant or returned by Revision::getContentModel().
350 * @param Language|null $lang The language to parse the message in (since 1.26)
351 *
352 * @throws MWException If the model ID isn't known.
353 * @return string The content model's localized name.
354 */
355 public static function getLocalizedName( $name, Language $lang = null ) {
356 // Messages: content-model-wikitext, content-model-text,
357 // content-model-javascript, content-model-css
358 $key = "content-model-$name";
359
360 $msg = wfMessage( $key );
361 if ( $lang ) {
362 $msg->inLanguage( $lang );
363 }
364
365 return $msg->exists() ? $msg->plain() : $name;
366 }
367
368 public static function getContentModels() {
369 global $wgContentHandlers;
370
371 $models = array_keys( $wgContentHandlers );
372 Hooks::run( 'GetContentModels', [ &$models ] );
373 return $models;
374 }
375
376 public static function getAllContentFormats() {
377 global $wgContentHandlers;
378
379 $formats = [];
380
381 foreach ( $wgContentHandlers as $model => $class ) {
382 $handler = self::getForModelID( $model );
383 $formats = array_merge( $formats, $handler->getSupportedFormats() );
384 }
385
386 $formats = array_unique( $formats );
387
388 return $formats;
389 }
390
391 // ------------------------------------------------------------------------
392
393 /**
394 * @var string
395 */
396 protected $mModelID;
397
398 /**
399 * @var string[]
400 */
401 protected $mSupportedFormats;
402
403 /**
404 * Constructor, initializing the ContentHandler instance with its model ID
405 * and a list of supported formats. Values for the parameters are typically
406 * provided as literals by subclass's constructors.
407 *
408 * @param string $modelId (use CONTENT_MODEL_XXX constants).
409 * @param string[] $formats List for supported serialization formats
410 * (typically as MIME types)
411 */
412 public function __construct( $modelId, $formats ) {
413 $this->mModelID = $modelId;
414 $this->mSupportedFormats = $formats;
415 }
416
417 /**
418 * Serializes a Content object of the type supported by this ContentHandler.
419 *
420 * @since 1.21
421 *
422 * @param Content $content The Content object to serialize
423 * @param string $format The desired serialization format
424 *
425 * @return string Serialized form of the content
426 */
427 abstract public function serializeContent( Content $content, $format = null );
428
429 /**
430 * Applies transformations on export (returns the blob unchanged per default).
431 * Subclasses may override this to perform transformations such as conversion
432 * of legacy formats or filtering of internal meta-data.
433 *
434 * @param string $blob The blob to be exported
435 * @param string|null $format The blob's serialization format
436 *
437 * @return string
438 */
439 public function exportTransform( $blob, $format = null ) {
440 return $blob;
441 }
442
443 /**
444 * Unserializes a Content object of the type supported by this ContentHandler.
445 *
446 * @since 1.21
447 *
448 * @param string $blob Serialized form of the content
449 * @param string $format The format used for serialization
450 *
451 * @return Content The Content object created by deserializing $blob
452 */
453 abstract public function unserializeContent( $blob, $format = null );
454
455 /**
456 * Apply import transformation (per default, returns $blob unchanged).
457 * This gives subclasses an opportunity to transform data blobs on import.
458 *
459 * @since 1.24
460 *
461 * @param string $blob
462 * @param string|null $format
463 *
464 * @return string
465 */
466 public function importTransform( $blob, $format = null ) {
467 return $blob;
468 }
469
470 /**
471 * Creates an empty Content object of the type supported by this
472 * ContentHandler.
473 *
474 * @since 1.21
475 *
476 * @return Content
477 */
478 abstract public function makeEmptyContent();
479
480 /**
481 * Creates a new Content object that acts as a redirect to the given page,
482 * or null if redirects are not supported by this content model.
483 *
484 * This default implementation always returns null. Subclasses supporting redirects
485 * must override this method.
486 *
487 * Note that subclasses that override this method to return a Content object
488 * should also override supportsRedirects() to return true.
489 *
490 * @since 1.21
491 *
492 * @param Title $destination The page to redirect to.
493 * @param string $text Text to include in the redirect, if possible.
494 *
495 * @return Content Always null.
496 */
497 public function makeRedirectContent( Title $destination, $text = '' ) {
498 return null;
499 }
500
501 /**
502 * Returns the model id that identifies the content model this
503 * ContentHandler can handle. Use with the CONTENT_MODEL_XXX constants.
504 *
505 * @since 1.21
506 *
507 * @return string The model ID
508 */
509 public function getModelID() {
510 return $this->mModelID;
511 }
512
513 /**
514 * @since 1.21
515 *
516 * @param string $model_id The model to check
517 *
518 * @throws MWException If the model ID is not the ID of the content model supported by this
519 * ContentHandler.
520 */
521 protected function checkModelID( $model_id ) {
522 if ( $model_id !== $this->mModelID ) {
523 throw new MWException( "Bad content model: " .
524 "expected {$this->mModelID} " .
525 "but got $model_id." );
526 }
527 }
528
529 /**
530 * Returns a list of serialization formats supported by the
531 * serializeContent() and unserializeContent() methods of this
532 * ContentHandler.
533 *
534 * @since 1.21
535 *
536 * @return string[] List of serialization formats as MIME type like strings
537 */
538 public function getSupportedFormats() {
539 return $this->mSupportedFormats;
540 }
541
542 /**
543 * The format used for serialization/deserialization by default by this
544 * ContentHandler.
545 *
546 * This default implementation will return the first element of the array
547 * of formats that was passed to the constructor.
548 *
549 * @since 1.21
550 *
551 * @return string The name of the default serialization format as a MIME type
552 */
553 public function getDefaultFormat() {
554 return $this->mSupportedFormats[0];
555 }
556
557 /**
558 * Returns true if $format is a serialization format supported by this
559 * ContentHandler, and false otherwise.
560 *
561 * Note that if $format is null, this method always returns true, because
562 * null means "use the default format".
563 *
564 * @since 1.21
565 *
566 * @param string $format The serialization format to check
567 *
568 * @return bool
569 */
570 public function isSupportedFormat( $format ) {
571 if ( !$format ) {
572 return true; // this means "use the default"
573 }
574
575 return in_array( $format, $this->mSupportedFormats );
576 }
577
578 /**
579 * Convenient for checking whether a format provided as a parameter is actually supported.
580 *
581 * @param string $format The serialization format to check
582 *
583 * @throws MWException If the format is not supported by this content handler.
584 */
585 protected function checkFormat( $format ) {
586 if ( !$this->isSupportedFormat( $format ) ) {
587 throw new MWException(
588 "Format $format is not supported for content model "
589 . $this->getModelID()
590 );
591 }
592 }
593
594 /**
595 * Returns overrides for action handlers.
596 * Classes listed here will be used instead of the default one when
597 * (and only when) $wgActions[$action] === true. This allows subclasses
598 * to override the default action handlers.
599 *
600 * @since 1.21
601 *
602 * @return array An array mapping action names (typically "view", "edit", "history" etc.) to
603 * either the full qualified class name of an Action class, a callable taking ( Page $page,
604 * IContextSource $context = null ) as parameters and returning an Action object, or an actual
605 * Action object. An empty array in this default implementation.
606 *
607 * @see Action::factory
608 */
609 public function getActionOverrides() {
610 return [];
611 }
612
613 /**
614 * Factory for creating an appropriate DifferenceEngine for this content model.
615 *
616 * @since 1.21
617 *
618 * @param IContextSource $context Context to use, anything else will be ignored.
619 * @param int $old Revision ID we want to show and diff with.
620 * @param int|string $new Either a revision ID or one of the strings 'cur', 'prev' or 'next'.
621 * @param int $rcid FIXME: Deprecated, no longer used. Defaults to 0.
622 * @param bool $refreshCache If set, refreshes the diff cache. Defaults to false.
623 * @param bool $unhide If set, allow viewing deleted revs. Defaults to false.
624 *
625 * @return DifferenceEngine
626 */
627 public function createDifferenceEngine( IContextSource $context, $old = 0, $new = 0,
628 $rcid = 0, // FIXME: Deprecated, no longer used
629 $refreshCache = false, $unhide = false
630 ) {
631 // hook: get difference engine
632 $differenceEngine = null;
633 if ( !Hooks::run( 'GetDifferenceEngine',
634 [ $context, $old, $new, $refreshCache, $unhide, &$differenceEngine ]
635 ) ) {
636 return $differenceEngine;
637 }
638 $diffEngineClass = $this->getDiffEngineClass();
639 return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
640 }
641
642 /**
643 * Get the language in which the content of the given page is written.
644 *
645 * This default implementation just returns $wgContLang (except for pages
646 * in the MediaWiki namespace)
647 *
648 * Note that the pages language is not cacheable, since it may in some
649 * cases depend on user settings.
650 *
651 * Also note that the page language may or may not depend on the actual content of the page,
652 * that is, this method may load the content in order to determine the language.
653 *
654 * @since 1.21
655 *
656 * @param Title $title The page to determine the language for.
657 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
658 *
659 * @return Language The page's language
660 */
661 public function getPageLanguage( Title $title, Content $content = null ) {
662 global $wgContLang, $wgLang;
663 $pageLang = $wgContLang;
664
665 if ( $title->getNamespace() == NS_MEDIAWIKI ) {
666 // Parse mediawiki messages with correct target language
667 list( /* $unused */, $lang ) = MessageCache::singleton()->figureMessage( $title->getText() );
668 $pageLang = Language::factory( $lang );
669 }
670
671 Hooks::run( 'PageContentLanguage', [ $title, &$pageLang, $wgLang ] );
672
673 return wfGetLangObj( $pageLang );
674 }
675
676 /**
677 * Get the language in which the content of this page is written when
678 * viewed by user. Defaults to $this->getPageLanguage(), but if the user
679 * specified a preferred variant, the variant will be used.
680 *
681 * This default implementation just returns $this->getPageLanguage( $title, $content ) unless
682 * the user specified a preferred variant.
683 *
684 * Note that the pages view language is not cacheable, since it depends on user settings.
685 *
686 * Also note that the page language may or may not depend on the actual content of the page,
687 * that is, this method may load the content in order to determine the language.
688 *
689 * @since 1.21
690 *
691 * @param Title $title The page to determine the language for.
692 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
693 *
694 * @return Language The page's language for viewing
695 */
696 public function getPageViewLanguage( Title $title, Content $content = null ) {
697 $pageLang = $this->getPageLanguage( $title, $content );
698
699 if ( $title->getNamespace() !== NS_MEDIAWIKI ) {
700 // If the user chooses a variant, the content is actually
701 // in a language whose code is the variant code.
702 $variant = $pageLang->getPreferredVariant();
703 if ( $pageLang->getCode() !== $variant ) {
704 $pageLang = Language::factory( $variant );
705 }
706 }
707
708 return $pageLang;
709 }
710
711 /**
712 * Determines whether the content type handled by this ContentHandler
713 * can be used on the given page.
714 *
715 * This default implementation always returns true.
716 * Subclasses may override this to restrict the use of this content model to specific locations,
717 * typically based on the namespace or some other aspect of the title, such as a special suffix
718 * (e.g. ".svg" for SVG content).
719 *
720 * @note this calls the ContentHandlerCanBeUsedOn hook which may be used to override which
721 * content model can be used where.
722 *
723 * @param Title $title The page's title.
724 *
725 * @return bool True if content of this kind can be used on the given page, false otherwise.
726 */
727 public function canBeUsedOn( Title $title ) {
728 $ok = true;
729
730 Hooks::run( 'ContentModelCanBeUsedOn', [ $this->getModelID(), $title, &$ok ] );
731
732 return $ok;
733 }
734
735 /**
736 * Returns the name of the diff engine to use.
737 *
738 * @since 1.21
739 *
740 * @return string
741 */
742 protected function getDiffEngineClass() {
743 return DifferenceEngine::class;
744 }
745
746 /**
747 * Attempts to merge differences between three versions. Returns a new
748 * Content object for a clean merge and false for failure or a conflict.
749 *
750 * This default implementation always returns false.
751 *
752 * @since 1.21
753 *
754 * @param Content $oldContent The page's previous content.
755 * @param Content $myContent One of the page's conflicting contents.
756 * @param Content $yourContent One of the page's conflicting contents.
757 *
758 * @return Content|bool Always false.
759 */
760 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
761 return false;
762 }
763
764 /**
765 * Return type of change if one exists for the given edit.
766 *
767 * @since 1.31
768 *
769 * @param Content|null $oldContent The previous text of the page.
770 * @param Content|null $newContent The submitted text of the page.
771 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
772 *
773 * @return string|null String key representing type of change, or null.
774 */
775 private function getChangeType(
776 Content $oldContent = null,
777 Content $newContent = null,
778 $flags = 0
779 ) {
780 $oldTarget = $oldContent !== null ? $oldContent->getRedirectTarget() : null;
781 $newTarget = $newContent !== null ? $newContent->getRedirectTarget() : null;
782
783 // We check for the type of change in the given edit, and return string key accordingly
784
785 // Blanking of a page
786 if ( $oldContent && $oldContent->getSize() > 0 &&
787 $newContent && $newContent->getSize() === 0
788 ) {
789 return 'blank';
790 }
791
792 // Redirects
793 if ( $newTarget ) {
794 if ( !$oldTarget ) {
795 // New redirect page (by creating new page or by changing content page)
796 return 'new-redirect';
797 } elseif ( !$newTarget->equals( $oldTarget ) ||
798 $oldTarget->getFragment() !== $newTarget->getFragment()
799 ) {
800 // Redirect target changed
801 return 'changed-redirect-target';
802 }
803 } elseif ( $oldTarget ) {
804 // Changing an existing redirect into a non-redirect
805 return 'removed-redirect';
806 }
807
808 // New page created
809 if ( $flags & EDIT_NEW && $newContent ) {
810 if ( $newContent->getSize() === 0 ) {
811 // New blank page
812 return 'newblank';
813 } else {
814 return 'newpage';
815 }
816 }
817
818 // Removing more than 90% of the page
819 if ( $oldContent && $newContent && $oldContent->getSize() > 10 * $newContent->getSize() ) {
820 return 'replace';
821 }
822
823 // Content model changed
824 if ( $oldContent && $newContent && $oldContent->getModel() !== $newContent->getModel() ) {
825 return 'contentmodelchange';
826 }
827
828 return null;
829 }
830
831 /**
832 * Return an applicable auto-summary if one exists for the given edit.
833 *
834 * @since 1.21
835 *
836 * @param Content|null $oldContent The previous text of the page.
837 * @param Content|null $newContent The submitted text of the page.
838 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
839 *
840 * @return string An appropriate auto-summary, or an empty string.
841 */
842 public function getAutosummary(
843 Content $oldContent = null,
844 Content $newContent = null,
845 $flags = 0
846 ) {
847 $changeType = $this->getChangeType( $oldContent, $newContent, $flags );
848
849 // There's no applicable auto-summary for our case, so our auto-summary is empty.
850 if ( !$changeType ) {
851 return '';
852 }
853
854 // Decide what kind of auto-summary is needed.
855 switch ( $changeType ) {
856 case 'new-redirect':
857 $newTarget = $newContent->getRedirectTarget();
858 $truncatedtext = $newContent->getTextForSummary(
859 250
860 - strlen( wfMessage( 'autoredircomment' )->inContentLanguage()->text() )
861 - strlen( $newTarget->getFullText() )
862 );
863
864 return wfMessage( 'autoredircomment', $newTarget->getFullText() )
865 ->plaintextParams( $truncatedtext )->inContentLanguage()->text();
866 case 'changed-redirect-target':
867 $oldTarget = $oldContent->getRedirectTarget();
868 $newTarget = $newContent->getRedirectTarget();
869
870 $truncatedtext = $newContent->getTextForSummary(
871 250
872 - strlen( wfMessage( 'autosumm-changed-redirect-target' )
873 ->inContentLanguage()->text() )
874 - strlen( $oldTarget->getFullText() )
875 - strlen( $newTarget->getFullText() )
876 );
877
878 return wfMessage( 'autosumm-changed-redirect-target',
879 $oldTarget->getFullText(),
880 $newTarget->getFullText() )
881 ->rawParams( $truncatedtext )->inContentLanguage()->text();
882 case 'removed-redirect':
883 $oldTarget = $oldContent->getRedirectTarget();
884 $truncatedtext = $newContent->getTextForSummary(
885 250
886 - strlen( wfMessage( 'autosumm-removed-redirect' )
887 ->inContentLanguage()->text() )
888 - strlen( $oldTarget->getFullText() ) );
889
890 return wfMessage( 'autosumm-removed-redirect', $oldTarget->getFullText() )
891 ->rawParams( $truncatedtext )->inContentLanguage()->text();
892 case 'newpage':
893 // If they're making a new article, give its text, truncated, in the summary.
894 $truncatedtext = $newContent->getTextForSummary(
895 200 - strlen( wfMessage( 'autosumm-new' )->inContentLanguage()->text() ) );
896
897 return wfMessage( 'autosumm-new' )->rawParams( $truncatedtext )
898 ->inContentLanguage()->text();
899 case 'blank':
900 return wfMessage( 'autosumm-blank' )->inContentLanguage()->text();
901 case 'replace':
902 $truncatedtext = $newContent->getTextForSummary(
903 200 - strlen( wfMessage( 'autosumm-replace' )->inContentLanguage()->text() ) );
904
905 return wfMessage( 'autosumm-replace' )->rawParams( $truncatedtext )
906 ->inContentLanguage()->text();
907 case 'newblank':
908 return wfMessage( 'autosumm-newblank' )->inContentLanguage()->text();
909 default:
910 return '';
911 }
912 }
913
914 /**
915 * Return an applicable tag if one exists for the given edit or return null.
916 *
917 * @since 1.31
918 *
919 * @param Content|null $oldContent The previous text of the page.
920 * @param Content|null $newContent The submitted text of the page.
921 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
922 *
923 * @return string|null An appropriate tag, or null.
924 */
925 public function getChangeTag(
926 Content $oldContent = null,
927 Content $newContent = null,
928 $flags = 0
929 ) {
930 $changeType = $this->getChangeType( $oldContent, $newContent, $flags );
931
932 // There's no applicable tag for this change.
933 if ( !$changeType ) {
934 return null;
935 }
936
937 // Core tags use the same keys as ones returned from $this->getChangeType()
938 // but prefixed with pseudo namespace 'mw-', so we add the prefix before checking
939 // if this type of change should be tagged
940 $tag = 'mw-' . $changeType;
941
942 // Not all change types are tagged, so we check against the list of defined tags.
943 if ( in_array( $tag, ChangeTags::getSoftwareTags() ) ) {
944 return $tag;
945 }
946
947 return null;
948 }
949
950 /**
951 * Auto-generates a deletion reason
952 *
953 * @since 1.21
954 *
955 * @param Title $title The page's title
956 * @param bool &$hasHistory Whether the page has a history
957 *
958 * @return mixed String containing deletion reason or empty string, or
959 * boolean false if no revision occurred
960 *
961 * @todo &$hasHistory is extremely ugly, it's here because
962 * WikiPage::getAutoDeleteReason() and Article::generateReason()
963 * have it / want it.
964 */
965 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
966 $dbr = wfGetDB( DB_REPLICA );
967
968 // Get the last revision
969 $rev = Revision::newFromTitle( $title );
970
971 if ( is_null( $rev ) ) {
972 return false;
973 }
974
975 // Get the article's contents
976 $content = $rev->getContent();
977 $blank = false;
978
979 // If the page is blank, use the text from the previous revision,
980 // which can only be blank if there's a move/import/protect dummy
981 // revision involved
982 if ( !$content || $content->isEmpty() ) {
983 $prev = $rev->getPrevious();
984
985 if ( $prev ) {
986 $rev = $prev;
987 $content = $rev->getContent();
988 $blank = true;
989 }
990 }
991
992 $this->checkModelID( $rev->getContentModel() );
993
994 // Find out if there was only one contributor
995 // Only scan the last 20 revisions
996 $revQuery = Revision::getQueryInfo();
997 $res = $dbr->select(
998 $revQuery['tables'],
999 [ 'rev_user_text' => $revQuery['fields']['rev_user_text'] ],
1000 [
1001 'rev_page' => $title->getArticleID(),
1002 $dbr->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0'
1003 ],
1004 __METHOD__,
1005 [ 'LIMIT' => 20 ],
1006 $revQuery['joins']
1007 );
1008
1009 if ( $res === false ) {
1010 // This page has no revisions, which is very weird
1011 return false;
1012 }
1013
1014 $hasHistory = ( $res->numRows() > 1 );
1015 $row = $dbr->fetchObject( $res );
1016
1017 if ( $row ) { // $row is false if the only contributor is hidden
1018 $onlyAuthor = $row->rev_user_text;
1019 // Try to find a second contributor
1020 foreach ( $res as $row ) {
1021 if ( $row->rev_user_text != $onlyAuthor ) { // T24999
1022 $onlyAuthor = false;
1023 break;
1024 }
1025 }
1026 } else {
1027 $onlyAuthor = false;
1028 }
1029
1030 // Generate the summary with a '$1' placeholder
1031 if ( $blank ) {
1032 // The current revision is blank and the one before is also
1033 // blank. It's just not our lucky day
1034 $reason = wfMessage( 'exbeforeblank', '$1' )->inContentLanguage()->text();
1035 } else {
1036 if ( $onlyAuthor ) {
1037 $reason = wfMessage(
1038 'excontentauthor',
1039 '$1',
1040 $onlyAuthor
1041 )->inContentLanguage()->text();
1042 } else {
1043 $reason = wfMessage( 'excontent', '$1' )->inContentLanguage()->text();
1044 }
1045 }
1046
1047 if ( $reason == '-' ) {
1048 // Allow these UI messages to be blanked out cleanly
1049 return '';
1050 }
1051
1052 // Max content length = max comment length - length of the comment (excl. $1)
1053 $text = $content ? $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ) : '';
1054
1055 // Now replace the '$1' placeholder
1056 $reason = str_replace( '$1', $text, $reason );
1057
1058 return $reason;
1059 }
1060
1061 /**
1062 * Get the Content object that needs to be saved in order to undo all revisions
1063 * between $undo and $undoafter. Revisions must belong to the same page,
1064 * must exist and must not be deleted.
1065 *
1066 * @since 1.21
1067 *
1068 * @param Revision $current The current text
1069 * @param Revision $undo The revision to undo
1070 * @param Revision $undoafter Must be an earlier revision than $undo
1071 *
1072 * @return mixed String on success, false on failure
1073 */
1074 public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter ) {
1075 $cur_content = $current->getContent();
1076
1077 if ( empty( $cur_content ) ) {
1078 return false; // no page
1079 }
1080
1081 $undo_content = $undo->getContent();
1082 $undoafter_content = $undoafter->getContent();
1083
1084 if ( !$undo_content || !$undoafter_content ) {
1085 return false; // no content to undo
1086 }
1087
1088 try {
1089 $this->checkModelID( $cur_content->getModel() );
1090 $this->checkModelID( $undo_content->getModel() );
1091 if ( $current->getId() !== $undo->getId() ) {
1092 // If we are undoing the most recent revision,
1093 // its ok to revert content model changes. However
1094 // if we are undoing a revision in the middle, then
1095 // doing that will be confusing.
1096 $this->checkModelID( $undoafter_content->getModel() );
1097 }
1098 } catch ( MWException $e ) {
1099 // If the revisions have different content models
1100 // just return false
1101 return false;
1102 }
1103
1104 if ( $cur_content->equals( $undo_content ) ) {
1105 // No use doing a merge if it's just a straight revert.
1106 return $undoafter_content;
1107 }
1108
1109 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
1110
1111 return $undone_content;
1112 }
1113
1114 /**
1115 * Get parser options suitable for rendering and caching the article
1116 *
1117 * @param IContextSource|User|string $context One of the following:
1118 * - IContextSource: Use the User and the Language of the provided
1119 * context
1120 * - User: Use the provided User object and $wgLang for the language,
1121 * so use an IContextSource object if possible.
1122 * - 'canonical': Canonical options (anonymous user with default
1123 * preferences and content language).
1124 *
1125 * @throws MWException
1126 * @return ParserOptions
1127 */
1128 public function makeParserOptions( $context ) {
1129 global $wgContLang;
1130
1131 if ( $context instanceof IContextSource ) {
1132 $user = $context->getUser();
1133 $lang = $context->getLanguage();
1134 } elseif ( $context instanceof User ) { // settings per user (even anons)
1135 $user = $context;
1136 $lang = null;
1137 } elseif ( $context === 'canonical' ) { // canonical settings
1138 $user = new User;
1139 $lang = $wgContLang;
1140 } else {
1141 throw new MWException( "Bad context for parser options: $context" );
1142 }
1143
1144 return ParserOptions::newCanonical( $user, $lang );
1145 }
1146
1147 /**
1148 * Returns true for content models that support caching using the
1149 * ParserCache mechanism. See WikiPage::shouldCheckParserCache().
1150 *
1151 * @since 1.21
1152 *
1153 * @return bool Always false.
1154 */
1155 public function isParserCacheSupported() {
1156 return false;
1157 }
1158
1159 /**
1160 * Returns true if this content model supports sections.
1161 * This default implementation returns false.
1162 *
1163 * Content models that return true here should also implement
1164 * Content::getSection, Content::replaceSection, etc. to handle sections..
1165 *
1166 * @return bool Always false.
1167 */
1168 public function supportsSections() {
1169 return false;
1170 }
1171
1172 /**
1173 * Returns true if this content model supports categories.
1174 * The default implementation returns true.
1175 *
1176 * @return bool Always true.
1177 */
1178 public function supportsCategories() {
1179 return true;
1180 }
1181
1182 /**
1183 * Returns true if this content model supports redirects.
1184 * This default implementation returns false.
1185 *
1186 * Content models that return true here should also implement
1187 * ContentHandler::makeRedirectContent to return a Content object.
1188 *
1189 * @return bool Always false.
1190 */
1191 public function supportsRedirects() {
1192 return false;
1193 }
1194
1195 /**
1196 * Return true if this content model supports direct editing, such as via EditPage.
1197 *
1198 * @return bool Default is false, and true for TextContent and it's derivatives.
1199 */
1200 public function supportsDirectEditing() {
1201 return false;
1202 }
1203
1204 /**
1205 * Whether or not this content model supports direct editing via ApiEditPage
1206 *
1207 * @return bool Default is false, and true for TextContent and derivatives.
1208 */
1209 public function supportsDirectApiEditing() {
1210 return $this->supportsDirectEditing();
1211 }
1212
1213 /**
1214 * Get fields definition for search index
1215 *
1216 * @todo Expose title, redirect, namespace, text, source_text, text_bytes
1217 * field mappings here. (see T142670 and T143409)
1218 *
1219 * @param SearchEngine $engine
1220 * @return SearchIndexField[] List of fields this content handler can provide.
1221 * @since 1.28
1222 */
1223 public function getFieldsForSearchIndex( SearchEngine $engine ) {
1224 $fields['category'] = $engine->makeSearchFieldMapping(
1225 'category',
1226 SearchIndexField::INDEX_TYPE_TEXT
1227 );
1228 $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1229
1230 $fields['external_link'] = $engine->makeSearchFieldMapping(
1231 'external_link',
1232 SearchIndexField::INDEX_TYPE_KEYWORD
1233 );
1234
1235 $fields['outgoing_link'] = $engine->makeSearchFieldMapping(
1236 'outgoing_link',
1237 SearchIndexField::INDEX_TYPE_KEYWORD
1238 );
1239
1240 $fields['template'] = $engine->makeSearchFieldMapping(
1241 'template',
1242 SearchIndexField::INDEX_TYPE_KEYWORD
1243 );
1244 $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1245
1246 $fields['content_model'] = $engine->makeSearchFieldMapping(
1247 'content_model',
1248 SearchIndexField::INDEX_TYPE_KEYWORD
1249 );
1250
1251 return $fields;
1252 }
1253
1254 /**
1255 * Add new field definition to array.
1256 * @param SearchIndexField[] &$fields
1257 * @param SearchEngine $engine
1258 * @param string $name
1259 * @param int $type
1260 * @return SearchIndexField[] new field defs
1261 * @since 1.28
1262 */
1263 protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) {
1264 $fields[$name] = $engine->makeSearchFieldMapping( $name, $type );
1265 return $fields;
1266 }
1267
1268 /**
1269 * Return fields to be indexed by search engine
1270 * as representation of this document.
1271 * Overriding class should call parent function or take care of calling
1272 * the SearchDataForIndex hook.
1273 * @param WikiPage $page Page to index
1274 * @param ParserOutput $output
1275 * @param SearchEngine $engine Search engine for which we are indexing
1276 * @return array Map of name=>value for fields
1277 * @since 1.28
1278 */
1279 public function getDataForSearchIndex(
1280 WikiPage $page,
1281 ParserOutput $output,
1282 SearchEngine $engine
1283 ) {
1284 $fieldData = [];
1285 $content = $page->getContent();
1286
1287 if ( $content ) {
1288 $searchDataExtractor = new ParserOutputSearchDataExtractor();
1289
1290 $fieldData['category'] = $searchDataExtractor->getCategories( $output );
1291 $fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
1292 $fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
1293 $fieldData['template'] = $searchDataExtractor->getTemplates( $output );
1294
1295 $text = $content->getTextForSearchIndex();
1296
1297 $fieldData['text'] = $text;
1298 $fieldData['source_text'] = $text;
1299 $fieldData['text_bytes'] = $content->getSize();
1300 $fieldData['content_model'] = $content->getModel();
1301 }
1302
1303 Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
1304 return $fieldData;
1305 }
1306
1307 /**
1308 * Produce page output suitable for indexing.
1309 *
1310 * Specific content handlers may override it if they need different content handling.
1311 *
1312 * @param WikiPage $page
1313 * @param ParserCache $cache
1314 * @return ParserOutput
1315 */
1316 public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
1317 $parserOptions = $page->makeParserOptions( 'canonical' );
1318 $revId = $page->getRevision()->getId();
1319 if ( $cache ) {
1320 $parserOutput = $cache->get( $page, $parserOptions );
1321 }
1322 if ( empty( $parserOutput ) ) {
1323 $parserOutput =
1324 $page->getContent()->getParserOutput( $page->getTitle(), $revId, $parserOptions );
1325 if ( $cache ) {
1326 $cache->save( $parserOutput, $page, $parserOptions );
1327 }
1328 }
1329 return $parserOutput;
1330 }
1331
1332 }