some documentation for ContentHandler
[lhc/web/wiklou.git] / includes / ContentHandler.php
1 <?php
2
3 class MWContentSerializationException extends MWException {
4
5 }
6
7
8 /**
9 * A content handler knows how do deal with a specific type of content on a wiki page.
10 * Content is stored in the database in a serialized form (using a serialization format aka mime type)
11 * and is be unserialized into it's native PHP represenation (the content model), which is wrappe in
12 * an instance of the appropriate subclass of Content.
13 *
14 * Some content types have a flat model, that is, their native represenation is the
15 * same as their serialized form. Examples would be JavaScript and CSS code. As of now,
16 * this also applies to wikitext (mediawiki's default content type), but wikitext
17 * content may be represented by a DOM or AST structure in the future.
18 *
19 */
20 abstract class ContentHandler {
21
22 /**
23 * Conveniance function for getting flat text from a Content object. This shleould only
24 * be used in the context of backwards compatibility with code that is not yet able
25 * to handle Content objects!
26 *
27 * If $content is equal to null or false, this method returns the empty string.
28 *
29 * If $content is an instance of TextContent, this method returns the flat text as returned by $content->getnativeData().
30 *
31 * If $content is not a TextContent object, the bahaviour of this method depends on the global $wgContentHandlerTextFallback:
32 * If $wgContentHandlerTextFallback is 'fail' and $content is not a TextContent object, an MWException is thrown.
33 * If $wgContentHandlerTextFallback is 'serialize' and $content is not a TextContent object, $content->serialize()
34 * is called to get a string form of the content.
35 * Otherwise, this method returns null.
36 *
37 * @static
38 * @param Content|null $content
39 * @return null|string the textual form of $content, if available
40 * @throws MWException if $content is not an instance of TextContent and $wgContentHandlerTextFallback was set to 'fail'.
41 */
42 public static function getContentText( Content $content = null ) {
43 global $wgContentHandlerTextFallback;
44
45 if ( !$content ) return '';
46
47 if ( $content instanceof TextContent ) {
48 return $content->getNativeData();
49 }
50
51 if ( $wgContentHandlerTextFallback == 'fail' ) throw new MWException( "Attempt to get text from Content with model " . $content->getModelName() );
52 if ( $wgContentHandlerTextFallback == 'serialize' ) return $content->serialize();
53
54 return null;
55 }
56
57 /**
58 * Conveniance function for creating a Content object from a given textual representation.
59 *
60 * $text will be deserialized into a Content object of the model specified by $modelName (or,
61 * if that is not given, $title->getContentModelName()) using the given format.
62 *
63 * @static
64 * @param $text the textual represenation, will be unserialized to create the Content object
65 * @param Title $title the title of the page this text belongs to, required as a context for deserialization
66 * @param null|String $modelName the model to deserialize to. If not provided, $title->getContentModelName() is used.
67 * @param null|String $format the format to use for deserialization. If not given, the model's default format is used.
68 *
69 * @return Content a Content object representing $text
70 */
71 public static function makeContent( $text, Title $title, $modelName = null, $format = null ) {
72 if ( !$modelName ) {
73 $modelName = $title->getContentModelName();
74 }
75
76 $handler = ContentHandler::getForModelName( $modelName );
77 return $handler->unserialize( $text, $format );
78 }
79
80 /**
81 * Returns the name of the default content model to be used for the page with the given title.
82 *
83 * Note: There should rarely be need to call this method directly.
84 * To determine the actual content model for a given page, use Title::getContentModelName().
85 *
86 * Which model is to be used per default for the page is determined based on several factors:
87 * * The global setting $wgNamespaceContentModels specifies a content model per namespace.
88 * * The hook DefaultModelFor may be used to override the page's default model.
89 * * Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript model if they end in .js or .css, respectively.
90 * * Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
91 * * The hook TitleIsCssOrJsPage may be used to force a page to use the CSS or JavaScript model if they end in .js or .css, respectively.
92 * * The hook TitleIsWikitextPage may be used to force a page to use the wikitext model.
93 *
94 * If none of the above applies, the wikitext model is used.
95 *
96 * Note: this is used by, and may thus not use, Title::getContentModelName()
97 *
98 * @static
99 * @param Title $title
100 * @return null|string default model name for the page given by $title
101 */
102 public static function getDefaultModelFor( Title $title ) {
103 global $wgNamespaceContentModels;
104
105 # NOTE: this method must not rely on $title->getContentModelName() directly or indirectly,
106 # because it is used to initialized the mContentModelName memebr.
107
108 $ns = $title->getNamespace();
109
110 $ext = false;
111 $m = null;
112 $model = null;
113
114 if ( !empty( $wgNamespaceContentModels[ $ns ] ) ) {
115 $model = $wgNamespaceContentModels[ $ns ];
116 }
117
118 # hook can determin default model
119 if ( !wfRunHooks( 'DefaultModelFor', array( $title, &$model ) ) ) { #FIXME: document new hook!
120 if ( $model ) return $model;
121 }
122
123 # Could this page contain custom CSS or JavaScript, based on the title?
124 $isCssOrJsPage = ( NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js)$!u', $title->getText(), $m ) );
125 if ( $isCssOrJsPage ) $ext = $m[1];
126
127 # hook can force js/css
128 wfRunHooks( 'TitleIsCssOrJsPage', array( $title, &$isCssOrJsPage ) );
129
130 # Is this a .css subpage of a user page?
131 $isJsCssSubpage = ( NS_USER == $ns && !$isCssOrJsPage && preg_match( "/\\/.*\\.(js|css)$/", $title->getText(), $m ) );
132 if ( $isJsCssSubpage ) $ext = $m[1];
133
134 # is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
135 $isWikitext = ( $model == CONTENT_MODEL_WIKITEXT || $model === null );
136 $isWikitext = ( $isWikitext && !$isCssOrJsPage && !$isJsCssSubpage );
137
138 # hook can override $isWikitext
139 wfRunHooks( 'TitleIsWikitextPage', array( $title, &$isWikitext ) );
140
141 if ( !$isWikitext ) {
142
143 if ( $ext == 'js' )
144 return CONTENT_MODEL_JAVASCRIPT;
145 else if ( $ext == 'css' )
146 return CONTENT_MODEL_CSS;
147
148 if ( $model )
149 return $model;
150 else
151 return CONTENT_MODEL_TEXT;
152 }
153
154 # we established that is must be wikitext
155 return CONTENT_MODEL_WIKITEXT;
156 }
157
158 /**
159 * returns the appropriate ContentHandler singleton for the given title
160 *
161 * @static
162 * @param Title $title
163 * @return ContentHandler
164 */
165 public static function getForTitle( Title $title ) {
166 $modelName = $title->getContentModelName();
167 return ContentHandler::getForModelName( $modelName );
168 }
169
170 /**
171 * returns the appropriate ContentHandler singleton for the given Content object
172 *
173 * @static
174 * @param Title $title
175 * @return ContentHandler
176 */
177 public static function getForContent( Content $content ) {
178 $modelName = $content->getModelName();
179 return ContentHandler::getForModelName( $modelName );
180 }
181
182 /**
183 * returns the ContentHandler singleton for the given model name. Use the CONTENT_MODEL_XXX constants to
184 * identify the desired content model.
185 *
186 * ContentHandler singletons are take from the global $wgContentHandlers array. Keys in that array are
187 * model names, the values are either ContentHandler singleton objects, or strings specifying the appropriate
188 * subclass of ContentHandler.
189 *
190 * If a class name in encountered when looking up the singleton for a given model name, the class is
191 * instantiated and the class name is replaced by te resulting singleton in $wgContentHandlers.
192 *
193 * If no ContentHandler is defined for the desired $modelName, the ContentHandler may be provided by the
194 * a ContentHandlerForModelName hook. if no Contenthandler can be determined, an MWException is raised.
195 *
196 * @static
197 * @param $modelName String the name of the content model for which to get a handler. Use CONTENT_MODEL_XXX constants.
198 * @return ContentHandler the ContentHandler singleton for handling the model given by $modelName
199 * @throws MWException if no handler is known for $modelName.
200 */
201 public static function getForModelName( $modelName ) {
202 global $wgContentHandlers;
203
204 if ( empty( $wgContentHandlers[$modelName] ) ) {
205 $handler = null;
206 wfRunHooks( "ContentHandlerForModelName", array( $modelName, &$handler ) ); #FIXME: document new hook
207
208 if ( $handler ) { # NOTE: may be a string or an object, either is fine!
209 $wgContentHandlers[$modelName] = $handler;
210 } else {
211 throw new MWException( "No handler for model $modelName registered in \$wgContentHandlers" );
212 }
213 }
214
215 if ( is_string( $wgContentHandlers[$modelName] ) ) {
216 $class = $wgContentHandlers[$modelName];
217 $wgContentHandlers[$modelName] = new $class( $modelName );
218 }
219
220 return $wgContentHandlers[$modelName];
221 }
222
223 # ----------------------------------------------------------------------------------------------------------
224
225 /**
226 * Constructor, initializing the ContentHandler instance with it's model name and a list of supported formats.
227 * Values for the parameters are typically provided as literals by subclasses' constructors.
228 *
229 * @param String $modelName (use CONTENT_MODEL_XXX constants).
230 * @param array $formats list for supported serialization formats (typically as MIME types)
231 */
232 public function __construct( $modelName, $formats ) {
233 $this->mModelName = $modelName;
234 $this->mSupportedFormats = $formats;
235 }
236
237 /**
238 *
239 * @return String the model name
240 */
241 public function getModelName() {
242 # for wikitext: wikitext; in the future: wikiast, wikidom?
243 # for wikidata: wikidata
244 return $this->mModelName;
245 }
246
247 protected function checkModelName( $modelName ) {
248 if ( $modelName !== $this->mModelName ) {
249 throw new MWException( "Bad content model: expected " . $this->mModelName . " but got found " . $modelName );
250 }
251 }
252
253 public function getSupportedFormats() {
254 # for wikitext: "text/x-mediawiki-1", "text/x-mediawiki-2", etc
255 # for wikidata: "application/json", "application/x-php", etc
256 return $this->mSupportedFormats;
257 }
258
259 public function getDefaultFormat() {
260 return $this->mSupportedFormats[0];
261 }
262
263 public function isSupportedFormat( $format ) {
264 if ( !$format ) return true; # this means "use the default"
265
266 return in_array( $format, $this->mSupportedFormats );
267 }
268
269 protected function checkFormat( $format ) {
270 if ( !$this->isSupportedFormat( $format ) ) {
271 throw new MWException( "Format $format is not supported for content model " . $this->getModelName() );
272 }
273 }
274
275 /**
276 * @abstract
277 * @param Content $content
278 * @param null $format
279 * @return String
280 */
281 public abstract function serialize( Content $content, $format = null );
282
283 /**
284 * @abstract
285 * @param $blob String
286 * @param null $format
287 * @return Content
288 */
289 public abstract function unserialize( $blob, $format = null );
290
291 public abstract function emptyContent();
292
293 /**
294 * Return an Article object suitable for viewing the given object
295 *
296 * NOTE: does *not* do special handling for Image and Category pages!
297 * Use Article::newFromTitle() for that!
298 *
299 * @param type $title
300 * @return \Article
301 * @todo Article is being refactored into an action class, keep track of that
302 */
303 public function createArticle( Title $title ) {
304 $this->checkModelName( $title->getContentModelName() );
305
306 $article = new Article($title);
307 return $article;
308 }
309
310 /**
311 * Return an EditPage object suitable for editing the given object
312 *
313 * @param type $article
314 * @return \EditPage
315 */
316 public function createEditPage( Article $article ) {
317 $this->checkModelName( $article->getContentModelName() );
318
319 $editPage = new EditPage( $article );
320 return $editPage;
321 }
322
323 /**
324 * Return an ExternalEdit object suitable for editing the given object
325 *
326 * @param type $article
327 * @return \ExternalEdit
328 */
329 public function createExternalEdit( IContextSource $context ) {
330 $this->checkModelName( $context->getTitle()->getModelName() );
331
332 $externalEdit = new ExternalEdit( $context );
333 return $externalEdit;
334 }
335
336 /**
337 * Factory
338 * @param $context IContextSource context to use, anything else will be ignored
339 * @param $old Integer old ID we want to show and diff with.
340 * @param $new String either 'prev' or 'next'.
341 * @param $rcid Integer ??? FIXME (default 0)
342 * @param $refreshCache boolean If set, refreshes the diff cache
343 * @param $unhide boolean If set, allow viewing deleted revs
344 */
345 public function getDifferenceEngine( IContextSource $context, $old = 0, $new = 0, $rcid = 0, #FIMXE: use everywhere!
346 $refreshCache = false, $unhide = false ) {
347
348 $this->checkModelName( $context->getTitle()->getModelName() );
349
350 $de = new DifferenceEngine( $context, $old, $new, $rcid, $refreshCache, $unhide );
351
352 return $de;
353 }
354
355 /**
356 * attempts to merge differences between three versions.
357 * Returns a new Content object for a clean merge and false for failure or a conflict.
358 *
359 * This default implementation always returns false.
360 *
361 * @param $oldContent String
362 * @param $myContent String
363 * @param $yourContent String
364 * @return Content|Bool
365 */
366 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
367 return false;
368 }
369
370 /**
371 * Return an applicable autosummary if one exists for the given edit.
372 *
373 * @param $oldContent Content|null: the previous text of the page.
374 * @param $newContent Content|null: The submitted text of the page.
375 * @param $flags Int bitmask: a bitmask of flags submitted for the edit.
376 *
377 * @return string An appropriate autosummary, or an empty string.
378 */
379 public function getAutosummary( Content $oldContent = null, Content $newContent = null, $flags ) {
380 global $wgContLang;
381
382 # Decide what kind of autosummary is needed.
383
384 # Redirect autosummaries
385 $ot = !empty( $ot ) ? $oldContent->getRedirectTarget() : false;
386 $rt = !empty( $rt ) ? $newContent->getRedirectTarget() : false;
387
388 if ( is_object( $rt ) && ( !is_object( $ot ) || !$rt->equals( $ot ) || $ot->getFragment() != $rt->getFragment() ) ) {
389
390 $truncatedtext = $newContent->getTextForSummary(
391 250
392 - strlen( wfMsgForContent( 'autoredircomment' ) )
393 - strlen( $rt->getFullText() ) );
394
395 return wfMsgForContent( 'autoredircomment', $rt->getFullText(), $truncatedtext );
396 }
397
398 # New page autosummaries
399 if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
400 # If they're making a new article, give its text, truncated, in the summary.
401
402 $truncatedtext = $newContent->getTextForSummary(
403 200 - strlen( wfMsgForContent( 'autosumm-new' ) ) );
404
405 return wfMsgForContent( 'autosumm-new', $truncatedtext );
406 }
407
408 # Blanking autosummaries
409 if ( $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
410 return wfMsgForContent( 'autosumm-blank' );
411 } elseif ( $oldContent->getSize() > 10 * $newContent->getSize() && $newContent->getSize() < 500 ) {
412 # Removing more than 90% of the article
413
414 $truncatedtext = $newContent->getTextForSummary(
415 200 - strlen( wfMsgForContent( 'autosumm-replace' ) ) );
416
417 return wfMsgForContent( 'autosumm-replace', $truncatedtext );
418 }
419
420 # If we reach this point, there's no applicable autosummary for our case, so our
421 # autosummary is empty.
422 return '';
423 }
424
425 /**
426 * Auto-generates a deletion reason
427 *
428 * @param $title Title: the page's title
429 * @param &$hasHistory Boolean: whether the page has a history
430 * @return mixed String containing deletion reason or empty string, or boolean false
431 * if no revision occurred
432 */
433 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
434 global $wgContLang;
435
436 $dbw = wfGetDB( DB_MASTER );
437
438 // Get the last revision
439 $rev = Revision::newFromTitle( $title );
440
441 if ( is_null( $rev ) ) {
442 return false;
443 }
444
445 // Get the article's contents
446 $content = $rev->getContent();
447 $blank = false;
448
449 // If the page is blank, use the text from the previous revision,
450 // which can only be blank if there's a move/import/protect dummy revision involved
451 if ( $content->getSize() == 0 ) {
452 $prev = $rev->getPrevious();
453
454 if ( $prev ) {
455 $content = $rev->getContent();
456 $blank = true;
457 }
458 }
459
460 // Find out if there was only one contributor
461 // Only scan the last 20 revisions
462 $res = $dbw->select( 'revision', 'rev_user_text',
463 array( 'rev_page' => $title->getArticleID(), $dbw->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0' ),
464 __METHOD__,
465 array( 'LIMIT' => 20 )
466 );
467
468 if ( $res === false ) {
469 // This page has no revisions, which is very weird
470 return false;
471 }
472
473 $hasHistory = ( $res->numRows() > 1 );
474 $row = $dbw->fetchObject( $res );
475
476 if ( $row ) { // $row is false if the only contributor is hidden
477 $onlyAuthor = $row->rev_user_text;
478 // Try to find a second contributor
479 foreach ( $res as $row ) {
480 if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
481 $onlyAuthor = false;
482 break;
483 }
484 }
485 } else {
486 $onlyAuthor = false;
487 }
488
489 // Generate the summary with a '$1' placeholder
490 if ( $blank ) {
491 // The current revision is blank and the one before is also
492 // blank. It's just not our lucky day
493 $reason = wfMsgForContent( 'exbeforeblank', '$1' );
494 } else {
495 if ( $onlyAuthor ) {
496 $reason = wfMsgForContent( 'excontentauthor', '$1', $onlyAuthor );
497 } else {
498 $reason = wfMsgForContent( 'excontent', '$1' );
499 }
500 }
501
502 if ( $reason == '-' ) {
503 // Allow these UI messages to be blanked out cleanly
504 return '';
505 }
506
507 // Max content length = max comment length - length of the comment (excl. $1)
508 $text = $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) );
509
510 // Now replace the '$1' placeholder
511 $reason = str_replace( '$1', $text, $reason );
512
513 return $reason;
514 }
515
516 /**
517 * Get the Content object that needs to be saved in order to undo all revisions
518 * between $undo and $undoafter. Revisions must belong to the same page,
519 * must exist and must not be deleted
520 * @param $undo Revision
521 * @param $undoafter null|Revision Must be an earlier revision than $undo
522 * @return mixed string on success, false on failure
523 */
524 public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter = null ) {
525 $cur_content = $current->getContent();
526
527 if ( empty( $cur_content ) ) {
528 return false; // no page
529 }
530
531 $undo_content = $undo->getContent();
532 $undoafter_content = $undoafter->getContent();
533
534 if ( $cur_content->equals( $undo_content ) ) {
535 # No use doing a merge if it's just a straight revert.
536 return $undoafter_content;
537 }
538
539 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
540
541 return $undone_content;
542 }
543 }
544
545
546 abstract class TextContentHandler extends ContentHandler {
547
548 public function __construct( $modelName, $formats ) {
549 parent::__construct( $modelName, $formats );
550 }
551
552 public function serialize( Content $content, $format = null ) {
553 $this->checkFormat( $format );
554 return $content->getNativeData();
555 }
556
557 /**
558 * attempts to merge differences between three versions.
559 * Returns a new Content object for a clean merge and false for failure or a conflict.
560 *
561 * This text-based implementation uses wfMerge().
562 *
563 * @param $oldContent String
564 * @param $myContent String
565 * @param $yourContent String
566 * @return Content|Bool
567 */
568 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
569 $this->checkModelName( $oldContent->getModelName() );
570 #TODO: check that all Content objects have the same content model! #XXX: what to do if they don't?
571
572 $format = $this->getDefaultFormat();
573
574 $old = $this->serialize( $oldContent, $format );
575 $mine = $this->serialize( $myContent, $format );
576 $yours = $this->serialize( $yourContent, $format );
577
578 $ok = wfMerge( $old, $mine, $yours, $result );
579
580 if ( !$ok ) return false;
581 if ( !$result ) return $this->emptyContent();
582
583 $mergedContent = $this->unserialize( $result, $format );
584 return $mergedContent;
585 }
586
587
588 }
589 class WikitextContentHandler extends TextContentHandler {
590
591 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
592 parent::__construct( $modelName, array( 'application/x-wikitext' ) ); #FIXME: mime
593 }
594
595 public function unserialize( $text, $format = null ) {
596 $this->checkFormat( $format );
597
598 return new WikitextContent( $text );
599 }
600
601 public function emptyContent() {
602 return new WikitextContent( "" );
603 }
604
605
606 }
607
608 #TODO: make ScriptContentHandler base class with plugin interface for syntax highlighting!
609
610 class JavaScriptContentHandler extends TextContentHandler {
611
612 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
613 parent::__construct( $modelName, array( 'text/javascript' ) ); #XXX: or use $wgJsMimeType? this is for internal storage, not HTTP...
614 }
615
616 public function unserialize( $text, $format = null ) {
617 return new JavaScriptContent( $text );
618 }
619
620 public function emptyContent() {
621 return new JavaScriptContent( "" );
622 }
623 }
624
625 class CssContentHandler extends TextContentHandler {
626
627 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
628 parent::__construct( $modelName, array( 'text/css' ) );
629 }
630
631 public function unserialize( $text, $format = null ) {
632 return new CssContent( $text );
633 }
634
635 public function emptyContent() {
636 return new CssContent( "" );
637 }
638
639 }