4 * Exception representing a failure to serialize or unserialize a content object.
6 class MWContentSerializationException
extends MWException
{
11 * A content handler knows how do deal with a specific type of content on a wiki page.
12 * Content is stored in the database in a serialized form (using a serialization format aka mime type)
13 * and is be unserialized into it's native PHP representation (the content model), which is wrapped in
14 * an instance of the appropriate subclass of Content.
16 * ContentHandler instances are stateless singletons that serve, among other things, as a factory for
17 * Content objects. Generally, there is one subclass of ContentHandler and one subclass of Content
18 * for every type of content model.
20 * Some content types have a flat model, that is, their native representation is the
21 * same as their serialized form. Examples would be JavaScript and CSS code. As of now,
22 * this also applies to wikitext (mediawiki's default content type), but wikitext
23 * content may be represented by a DOM or AST structure in the future.
27 abstract class ContentHandler
{
30 * Convenience function for getting flat text from a Content object. This should only
31 * be used in the context of backwards compatibility with code that is not yet able
32 * to handle Content objects!
34 * If $content is null, this method returns the empty string.
36 * If $content is an instance of TextContent, this method returns the flat text as returned by $content->getNativeData().
38 * If $content is not a TextContent object, the behavior of this method depends on the global $wgContentHandlerTextFallback:
39 * * If $wgContentHandlerTextFallback is 'fail' and $content is not a TextContent object, an MWException is thrown.
40 * * If $wgContentHandlerTextFallback is 'serialize' and $content is not a TextContent object, $content->serialize()
41 * is called to get a string form of the content.
42 * * If $wgContentHandlerTextFallback is 'ignore' and $content is not a TextContent object, this method returns null.
43 * * otherwise, the behaviour is undefined.
48 * @param Content|null $content
49 * @return null|string the textual form of $content, if available
50 * @throws MWException if $content is not an instance of TextContent and $wgContentHandlerTextFallback was set to 'fail'.
52 public static function getContentText( Content
$content = null ) {
53 global $wgContentHandlerTextFallback;
55 if ( is_null( $content ) ) {
59 if ( $content instanceof TextContent
) {
60 return $content->getNativeData();
63 if ( $wgContentHandlerTextFallback == 'fail' ) {
64 throw new MWException( "Attempt to get text from Content with model " . $content->getModel() );
67 if ( $wgContentHandlerTextFallback == 'serialize' ) {
68 return $content->serialize();
75 * Convenience function for creating a Content object from a given textual representation.
77 * $text will be deserialized into a Content object of the model specified by $modelId (or,
78 * if that is not given, $title->getContentModel()) using the given format.
83 * @param string $text the textual representation, will be unserialized to create the Content object
84 * @param Title $title the title of the page this text belongs to, required as a context for deserialization
85 * @param null|String $modelId the model to deserialize to. If not provided, $title->getContentModel() is used.
86 * @param null|String $format the format to use for deserialization. If not given, the model's default format is used.
88 * @return Content a Content object representing $text
89 * @throw MWException if $model or $format is not supported or if $text can not be unserialized using $format.
91 public static function makeContent( $text, Title
$title, $modelId = null, $format = null ) {
93 if ( is_null( $modelId ) ) {
94 $modelId = $title->getContentModel();
97 $handler = ContentHandler
::getForModelID( $modelId );
98 return $handler->unserializeContent( $text, $format );
102 * Returns the name of the default content model to be used for the page with the given title.
104 * Note: There should rarely be need to call this method directly.
105 * To determine the actual content model for a given page, use Title::getContentModel().
107 * Which model is to be used per default for the page is determined based on several factors:
108 * * The global setting $wgNamespaceContentModels specifies a content model per namespace.
109 * * The hook DefaultModelFor may be used to override the page's default model.
110 * * Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript model if they end in .js or .css, respectively.
111 * * Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
112 * * The hook TitleIsCssOrJsPage may be used to force a page to use the CSS or JavaScript model if they end in .js or .css, respectively.
113 * * The hook TitleIsWikitextPage may be used to force a page to use the wikitext model.
115 * If none of the above applies, the wikitext model is used.
117 * Note: this is used by, and may thus not use, Title::getContentModel()
122 * @param Title $title
123 * @return null|string default model name for the page given by $title
125 public static function getDefaultModelFor( Title
$title ) {
126 global $wgNamespaceContentModels;
128 // NOTE: this method must not rely on $title->getContentModel() directly or indirectly,
129 // because it is used to initialized the mContentModel member.
131 $ns = $title->getNamespace();
137 if ( !empty( $wgNamespaceContentModels[ $ns ] ) ) {
138 $model = $wgNamespaceContentModels[ $ns ];
141 // hook can determin default model
142 if ( !wfRunHooks( 'ContentHandlerDefaultModelFor', array( $title, &$model ) ) ) {
143 if ( !is_null( $model ) ) {
148 // Could this page contain custom CSS or JavaScript, based on the title?
149 $isCssOrJsPage = NS_MEDIAWIKI
== $ns && preg_match( '!\.(css|js)$!u', $title->getText(), $m );
150 if ( $isCssOrJsPage ) {
154 // hook can force js/css
155 wfRunHooks( 'TitleIsCssOrJsPage', array( $title, &$isCssOrJsPage ) );
157 // Is this a .css subpage of a user page?
158 $isJsCssSubpage = NS_USER
== $ns && !$isCssOrJsPage && preg_match( "/\\/.*\\.(js|css)$/", $title->getText(), $m );
159 if ( $isJsCssSubpage ) {
163 // is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
164 $isWikitext = is_null( $model ) ||
$model == CONTENT_MODEL_WIKITEXT
;
165 $isWikitext = $isWikitext && !$isCssOrJsPage && !$isJsCssSubpage;
167 // hook can override $isWikitext
168 wfRunHooks( 'TitleIsWikitextPage', array( $title, &$isWikitext ) );
170 if ( !$isWikitext ) {
173 return CONTENT_MODEL_JAVASCRIPT
;
175 return CONTENT_MODEL_CSS
;
177 return is_null( $model ) ? CONTENT_MODEL_TEXT
: $model;
181 // we established that is must be wikitext
183 return CONTENT_MODEL_WIKITEXT
;
187 * returns the appropriate ContentHandler singleton for the given title
192 * @param Title $title
193 * @return ContentHandler
195 public static function getForTitle( Title
$title ) {
196 $modelId = $title->getContentModel();
197 return ContentHandler
::getForModelID( $modelId );
201 * returns the appropriate ContentHandler singleton for the given Content object
206 * @param Content $content
207 * @return ContentHandler
209 public static function getForContent( Content
$content ) {
210 $modelId = $content->getModel();
211 return ContentHandler
::getForModelID( $modelId );
215 * returns the ContentHandler singleton for the given model id. Use the CONTENT_MODEL_XXX constants to
216 * identify the desired content model.
218 * ContentHandler singletons are take from the global $wgContentHandlers array. Keys in that array are
219 * model names, the values are either ContentHandler singleton objects, or strings specifying the appropriate
220 * subclass of ContentHandler.
222 * If a class name in encountered when looking up the singleton for a given model name, the class is
223 * instantiated and the class name is replaced by te resulting singleton in $wgContentHandlers.
225 * If no ContentHandler is defined for the desired $modelId, the ContentHandler may be provided by the
226 * a ContentHandlerForModelID hook. if no ContentHandler can be determined, an MWException is raised.
231 * @param $modelId int the id of the content model for which to get a handler. Use CONTENT_MODEL_XXX constants.
232 * @return ContentHandler the ContentHandler singleton for handling the model given by $modelId
233 * @throws MWException if no handler is known for $modelId.
235 public static function getForModelID( $modelId ) {
236 global $wgContentHandlers;
238 if ( empty( $wgContentHandlers[$modelId] ) ) {
241 wfRunHooks( 'ContentHandlerForModelID', array( $modelId, &$handler ) );
243 if ( $handler ) { // NOTE: may be a string or an object, either is fine!
244 $wgContentHandlers[$modelId] = $handler;
246 throw new MWException( "No handler for model #$modelId registered in \$wgContentHandlers" );
250 if ( is_string( $wgContentHandlers[$modelId] ) ) {
251 $class = $wgContentHandlers[$modelId];
252 $wgContentHandlers[$modelId] = new $class( $modelId );
255 return $wgContentHandlers[$modelId];
259 * Returns the appropriate mime type for a given content format,
260 * or null if no mime type is known for this format.
262 * Mime types can be registered in the global array $wgContentFormatMimeTypes.
265 * @param int $id the content format id, as given by a CONTENT_FORMAT_XXX constant
266 * or returned by Revision::getContentFormat().
268 * @return String|null the content format's mime type.
270 public static function getContentFormatMimeType( $id ) {
271 global $wgContentFormatMimeTypes;
273 if ( !isset( $wgContentFormatMimeTypes[ $id ] ) ) {
277 return $wgContentFormatMimeTypes[ $id ];
281 * Returns the content format if for a given mime type,
282 * or null if no format id if known for this mime type.
284 * Mime types can be registered in the global array $wgContentFormatMimeTypes.
287 * @param String $mime the mime type
289 * @return int|null the format id, as defined by a CONTENT_FORMAT_XXX constant
291 public static function getContentFormatID( $mime ) {
292 global $wgContentFormatMimeTypes;
294 static $format_ids = null;
296 if ( $format_ids === null ) {
297 $format_ids = array_flip( $wgContentFormatMimeTypes );
300 if ( !isset( $format_ids[ $mime ] ) ) {
304 return $format_ids[ $mime ];
308 * Returns the localized name for a given content model,
309 * or null of no mime type is known.
311 * Model names are localized using system messages. Message keys
312 * have the form content-model-$id.
315 * @param int $id the content model id, as given by a CONTENT_MODEL_XXX constant
316 * or returned by Revision::getContentModel().
318 * @return String|null the content format's mime type.
320 public static function getContentModelName( $id ) {
321 $key = "content-model-$id";
323 if ( wfEmptyMsg( $key ) ) return null;
324 else return wfMsg( $key );
327 // ----------------------------------------------------------------------------------------------------------
330 protected $mSupportedFormats;
333 * Constructor, initializing the ContentHandler instance with it's model id and a list of supported formats.
334 * Values for the parameters are typically provided as literals by subclasses' constructors.
336 * @param int $modelId (use CONTENT_MODEL_XXX constants).
337 * @param array $formats list for supported serialization formats (typically as MIME types)
339 public function __construct( $modelId, $formats ) {
340 $this->mModelID
= $modelId;
341 $this->mSupportedFormats
= $formats;
346 * Serializes Content object of the type supported by this ContentHandler.
351 * @param Content $content the Content object to serialize
352 * @param null $format the desired serialization format
353 * @return String serialized form of the content
355 public abstract function serializeContent( Content
$content, $format = null );
358 * Unserializes a Content object of the type supported by this ContentHandler.
363 * @param $blob String serialized form of the content
364 * @param null $format the format used for serialization
365 * @return Content the Content object created by deserializing $blob
367 public abstract function unserializeContent( $blob, $format = null );
370 * Creates an empty Content object of the type supported by this ContentHandler.
376 public abstract function makeEmptyContent();
379 * Returns the model id that identifies the content model this ContentHandler can handle.
380 * Use with the CONTENT_MODEL_XXX constants.
384 * @return int the model id
386 public function getModelID() {
387 return $this->mModelID
;
391 * Throws an MWException if $model_id is not the id of the content model
392 * supported by this ContentHandler.
396 * @param int $model_id the model to check
398 * @throws MWException
400 protected function checkModelID( $model_id ) {
401 if ( $model_id !== $this->mModelID
) {
402 $model_name = ContentHandler
::getContentModelName( $model_id );
403 $own_model_name = ContentHandler
::getContentModelName( $this->mModelID
);
405 throw new MWException( "Bad content model: expected {$this->mModelID} ($own_model_name) but got found $model_id ($model_name)." );
410 * Returns a list of serialization formats supported by the serializeContent() and unserializeContent() methods of
411 * this ContentHandler.
415 * @return array of serialization formats as MIME type like strings
417 public function getSupportedFormats() {
418 return $this->mSupportedFormats
;
422 * The format used for serialization/deserialization per default by this ContentHandler.
424 * This default implementation will return the first element of the array of formats
425 * that was passed to the constructor.
429 * @return String the name of the default serialization format as a MIME type
431 public function getDefaultFormat() {
432 return $this->mSupportedFormats
[0];
436 * Returns true if $format is a serialization format supported by this ContentHandler,
437 * and false otherwise.
439 * Note that if $format is null, this method always returns true, because null
440 * means "use the default format".
444 * @param String $format the serialization format to check
447 public function isSupportedFormat( $format ) {
450 return true; // this means "use the default"
453 return in_array( $format, $this->mSupportedFormats
);
457 * Throws an MWException if isSupportedFormat( $format ) is not true. Convenient
458 * for checking whether a format provided as a parameter is actually supported.
460 * @param String $format the serialization format to check
462 * @throws MWException
464 protected function checkFormat( $format ) {
465 if ( !$this->isSupportedFormat( $format ) ) {
466 throw new MWException( "Format $format is not supported for content model " . $this->getModelID() );
471 * Returns if the content is consistent with the database, that is if saving it to the database would not violate any
472 * global constraints.
474 * Content needs to be valid using this method before it can be saved.
476 * This default implementation always returns true.
480 * @param \Content $content
484 public function isConsistentWithDatabase( Content
$content ) {
489 * Returns overrides for action handlers.
490 * Classes listed here will be used instead of the default one when
491 * (and only when) $wgActions[$action] === true. This allows subclasses
492 * to override the default action handlers.
498 public function getActionOverrides() {
503 * Factory creating an appropriate DifferenceEngine for this content model.
507 * @param $context IContextSource context to use, anything else will be ignored
508 * @param $old Integer old ID we want to show and diff with.
509 * @param int|String $new String either 'prev' or 'next'.
510 * @param $rcid Integer ??? FIXME (default 0)
511 * @param $refreshCache boolean If set, refreshes the diff cache
512 * @param $unhide boolean If set, allow viewing deleted revs
514 * @return DifferenceEngine
516 public function createDifferenceEngine( IContextSource
$context, $old = 0, $new = 0, $rcid = 0, #FIMXE: use everywhere!
517 $refreshCache = false, $unhide = false ) {
519 $this->checkModelID( $context->getTitle()->getContentModel() );
521 $diffEngineClass = $this->getDiffEngineClass();
523 return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
527 * Returns the name of the diff engine to use.
533 protected function getDiffEngineClass() {
534 return 'DifferenceEngine';
538 * attempts to merge differences between three versions.
539 * Returns a new Content object for a clean merge and false for failure or a conflict.
541 * This default implementation always returns false.
545 * @param Content|String $oldContent String
546 * @param Content|String $myContent String
547 * @param Content|String $yourContent String
549 * @return Content|Bool
551 public function merge3( Content
$oldContent, Content
$myContent, Content
$yourContent ) {
556 * Return an applicable auto-summary if one exists for the given edit.
560 * @param $oldContent Content|null: the previous text of the page.
561 * @param $newContent Content|null: The submitted text of the page.
562 * @param $flags Int bit mask: a bit mask of flags submitted for the edit.
564 * @return string An appropriate auto-summary, or an empty string.
566 public function getAutosummary( Content
$oldContent = null, Content
$newContent = null, $flags ) {
569 // Decide what kind of auto-summary is needed.
571 // Redirect auto-summaries
578 $ot = !is_null( $oldContent ) ?
$oldContent->getRedirectTarget() : null;
579 $rt = !is_null( $newContent ) ?
$newContent->getRedirectTarget() : null;
581 if ( is_object( $rt ) && ( !is_object( $ot ) ||
!$rt->equals( $ot ) ||
$ot->getFragment() != $rt->getFragment() ) ) {
583 $truncatedtext = $newContent->getTextForSummary(
585 - strlen( wfMsgForContent( 'autoredircomment' ) )
586 - strlen( $rt->getFullText() ) );
588 return wfMsgForContent( 'autoredircomment', $rt->getFullText(), $truncatedtext );
591 // New page auto-summaries
592 if ( $flags & EDIT_NEW
&& $newContent->getSize() > 0 ) {
593 // If they're making a new article, give its text, truncated, in the summary.
595 $truncatedtext = $newContent->getTextForSummary(
596 200 - strlen( wfMsgForContent( 'autosumm-new' ) ) );
598 return wfMsgForContent( 'autosumm-new', $truncatedtext );
601 // Blanking auto-summaries
602 if ( !empty( $oldContent ) && $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
603 return wfMsgForContent( 'autosumm-blank' );
604 } elseif ( !empty( $oldContent ) && $oldContent->getSize() > 10 * $newContent->getSize() && $newContent->getSize() < 500 ) {
605 // Removing more than 90% of the article
607 $truncatedtext = $newContent->getTextForSummary(
608 200 - strlen( wfMsgForContent( 'autosumm-replace' ) ) );
610 return wfMsgForContent( 'autosumm-replace', $truncatedtext );
613 // If we reach this point, there's no applicable auto-summary for our case, so our
614 // auto-summary is empty.
620 * Auto-generates a deletion reason
624 * @param $title Title: the page's title
625 * @param &$hasHistory Boolean: whether the page has a history
626 * @return mixed String containing deletion reason or empty string, or boolean false
627 * if no revision occurred
629 * @XXX &$hasHistory is extremely ugly, it's here because WikiPage::getAutoDeleteReason() and Article::getReason() have it / want it.
631 public function getAutoDeleteReason( Title
$title, &$hasHistory ) {
632 $dbw = wfGetDB( DB_MASTER
);
634 // Get the last revision
635 $rev = Revision
::newFromTitle( $title );
637 if ( is_null( $rev ) ) {
641 // Get the article's contents
642 $content = $rev->getContent();
645 // If the page is blank, use the text from the previous revision,
646 // which can only be blank if there's a move/import/protect dummy revision involved
647 if ( $content->getSize() == 0 ) {
648 $prev = $rev->getPrevious();
651 $content = $rev->getContent();
656 // Find out if there was only one contributor
657 // Only scan the last 20 revisions
658 $res = $dbw->select( 'revision', 'rev_user_text',
659 array( 'rev_page' => $title->getArticleID(), $dbw->bitAnd( 'rev_deleted', Revision
::DELETED_USER
) . ' = 0' ),
661 array( 'LIMIT' => 20 )
664 if ( $res === false ) {
665 // This page has no revisions, which is very weird
669 $hasHistory = ( $res->numRows() > 1 );
670 $row = $dbw->fetchObject( $res );
672 if ( $row ) { // $row is false if the only contributor is hidden
673 $onlyAuthor = $row->rev_user_text
;
674 // Try to find a second contributor
675 foreach ( $res as $row ) {
676 if ( $row->rev_user_text
!= $onlyAuthor ) { // Bug 22999
685 // Generate the summary with a '$1' placeholder
687 // The current revision is blank and the one before is also
688 // blank. It's just not our lucky day
689 $reason = wfMsgForContent( 'exbeforeblank', '$1' );
692 $reason = wfMsgForContent( 'excontentauthor', '$1', $onlyAuthor );
694 $reason = wfMsgForContent( 'excontent', '$1' );
698 if ( $reason == '-' ) {
699 // Allow these UI messages to be blanked out cleanly
703 // Max content length = max comment length - length of the comment (excl. $1)
704 $text = $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) );
706 // Now replace the '$1' placeholder
707 $reason = str_replace( '$1', $text, $reason );
712 #@TODO: getSecondaryUpdatesForDeletion( Content ) returns an array of DataUpdate objects
713 #... or do that in the Content class?
716 * Get the Content object that needs to be saved in order to undo all revisions
717 * between $undo and $undoafter. Revisions must belong to the same page,
718 * must exist and must not be deleted
722 * @param $current Revision the current text
723 * @param $undo Revision the revision to undo
724 * @param $undoafter Revision Must be an earlier revision than $undo
726 * @return mixed string on success, false on failure
728 public function getUndoContent( Revision
$current, Revision
$undo, Revision
$undoafter ) {
729 $cur_content = $current->getContent();
731 if ( empty( $cur_content ) ) {
732 return false; // no page
735 $undo_content = $undo->getContent();
736 $undoafter_content = $undoafter->getContent();
738 if ( $cur_content->equals( $undo_content ) ) {
739 // No use doing a merge if it's just a straight revert.
740 return $undoafter_content;
743 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
745 return $undone_content;
749 * Returns true for content models that support caching using the ParserCache mechanism.
750 * See WikiPage::isParserCacheUser().
756 public function isParserCacheSupported() {
763 * @param $page WikiPage the page that was deleted (note: $page->getId() must still return the old page ID!)
765 * @return array a list of DataUpdate instances that will clean up the database ofter deletion.
767 public function getDeletionUpdates( WikiPage
$page ) {
769 new LinksDeletionUpdate( $page ),
777 abstract class TextContentHandler
extends ContentHandler
{
779 public function __construct( $modelId, $formats ) {
780 parent
::__construct( $modelId, $formats );
784 * Returns the content's text as-is.
786 * @param Content $content
787 * @param String|null $format
790 public function serializeContent( Content
$content, $format = null ) {
791 $this->checkFormat( $format );
792 return $content->getNativeData();
796 * attempts to merge differences between three versions.
797 * Returns a new Content object for a clean merge and false for failure or a conflict.
799 * All three Content objects passed as parameters must have the same content model.
801 * This text-based implementation uses wfMerge().
803 * @param \Content|String $oldContent String
804 * @param \Content|String $myContent String
805 * @param \Content|String $yourContent String
807 * @return Content|Bool
809 public function merge3( Content
$oldContent, Content
$myContent, Content
$yourContent ) {
810 $this->checkModelID( $oldContent->getModel() );
811 $this->checkModelID( $myContent->getModel() );
812 $this->checkModelID( $yourContent->getModel() );
814 $format = $this->getDefaultFormat();
816 $old = $this->serializeContent( $oldContent, $format );
817 $mine = $this->serializeContent( $myContent, $format );
818 $yours = $this->serializeContent( $yourContent, $format );
820 $ok = wfMerge( $old, $mine, $yours, $result );
827 return $this->makeEmptyContent();
830 $mergedContent = $this->unserializeContent( $result, $format );
831 return $mergedContent;
840 class WikitextContentHandler
extends TextContentHandler
{
842 public function __construct( $modelId = CONTENT_MODEL_WIKITEXT
) {
843 parent
::__construct( $modelId, array( CONTENT_FORMAT_WIKITEXT
) );
846 public function unserializeContent( $text, $format = null ) {
847 $this->checkFormat( $format );
849 return new WikitextContent( $text );
852 public function makeEmptyContent() {
853 return new WikitextContent( '' );
859 #XXX: make ScriptContentHandler base class with plugin interface for syntax highlighting?
864 class JavaScriptContentHandler
extends TextContentHandler
{
866 public function __construct( $modelId = CONTENT_MODEL_JAVASCRIPT
) {
867 parent
::__construct( $modelId, array( CONTENT_FORMAT_JAVASCRIPT
) );
870 public function unserializeContent( $text, $format = null ) {
871 $this->checkFormat( $format );
873 return new JavaScriptContent( $text );
876 public function makeEmptyContent() {
877 return new JavaScriptContent( '' );
884 class CssContentHandler
extends TextContentHandler
{
886 public function __construct( $modelId = CONTENT_MODEL_CSS
) {
887 parent
::__construct( $modelId, array( CONTENT_FORMAT_CSS
) );
890 public function unserializeContent( $text, $format = null ) {
891 $this->checkFormat( $format );
893 return new CssContent( $text );
896 public function makeEmptyContent() {
897 return new CssContent( '' );