75fe391fe85dae78a3ec9b5e0f012d7713e07659
[lhc/web/wiklou.git] / includes / Content.php
1 <?php
2 /**
3 * A content object represents page content, e.g. the text to show on a page.
4 * Content objects have no knowledge about how they relate to Wiki pages.
5 *
6 * @since 1.WD
7 */
8 abstract class Content {
9
10 /**
11 * Name of the content model this Content object represents.
12 * Use with CONTENT_MODEL_XXX constants
13 *
14 * @var String $model_id
15 */
16 protected $model_id;
17
18 /**
19 * @since WD.1
20 *
21 * @return String a string representing the content in a way useful for building a full text search index.
22 * If no useful representation exists, this method returns an empty string.
23 *
24 * @todo: test that this actually works
25 * @todo: make sure this also works with LuceneSearch / WikiSearch
26 */
27 public abstract function getTextForSearchIndex( );
28
29 /**
30 * @since WD.1
31 *
32 * @return String the wikitext to include when another page includes this content, or false if the content is not
33 * includable in a wikitext page.
34 *
35 * @TODO: allow native handling, bypassing wikitext representation, like for includable special pages.
36 * @TODO: use in parser, etc!
37 */
38 public abstract function getWikitextForTransclusion( );
39
40 /**
41 * Returns a textual representation of the content suitable for use in edit summaries and log messages.
42 *
43 * @since WD.1
44 *
45 * @param int $maxlength maximum length of the summary text
46 * @return String the summary text
47 */
48 public abstract function getTextForSummary( $maxlength = 250 );
49
50 /**
51 * Returns native representation of the data. Interpretation depends on the data model used,
52 * as given by getDataModel().
53 *
54 * @since WD.1
55 *
56 * @return mixed the native representation of the content. Could be a string, a nested array
57 * structure, an object, a binary blob... anything, really.
58 *
59 * @NOTE: review all calls carefully, caller must be aware of content model!
60 */
61 public abstract function getNativeData( );
62
63 /**
64 * returns the content's nominal size in bogo-bytes.
65 *
66 * @return int
67 */
68 public abstract function getSize( );
69
70 /**
71 * @param int $model_id
72 */
73 public function __construct( $model_id = null ) {
74 $this->model_id = $model_id;
75 }
76
77 /**
78 * Returns the id of the content model used by this content objects.
79 * Corresponds to the CONTENT_MODEL_XXX constants.
80 *
81 * @since WD.1
82 *
83 * @return int the model id
84 */
85 public function getModel() {
86 return $this->model_id;
87 }
88
89 /**
90 * Throws an MWException if $model_id is not the id of the content model
91 * supported by this Content object.
92 *
93 * @param int $model_id the model to check
94 *
95 * @throws MWException
96 */
97 protected function checkModelID( $model_id ) {
98 if ( $model_id !== $this->model_id ) {
99 $model_name = ContentHandler::getContentModelName( $model_id );
100 $own_model_name = ContentHandler::getContentModelName( $this->model_id );
101
102 throw new MWException( "Bad content model: expected {$this->model_id} ($own_model_name) but got found $model_id ($model_name)." );
103 }
104 }
105
106 /**
107 * Convenience method that returns the ContentHandler singleton for handling the content
108 * model this Content object uses.
109 *
110 * Shorthand for ContentHandler::getForContent( $this )
111 *
112 * @since WD.1
113 *
114 * @return ContentHandler
115 */
116 public function getContentHandler() {
117 return ContentHandler::getForContent( $this );
118 }
119
120 /**
121 * Convenience method that returns the default serialization format for the content model
122 * model this Content object uses.
123 *
124 * Shorthand for $this->getContentHandler()->getDefaultFormat()
125 *
126 * @since WD.1
127 *
128 * @return ContentHandler
129 */
130 public function getDefaultFormat() {
131 return $this->getContentHandler()->getDefaultFormat();
132 }
133
134 /**
135 * Convenience method that returns the list of serialization formats supported
136 * for the content model model this Content object uses.
137 *
138 * Shorthand for $this->getContentHandler()->getSupportedFormats()
139 *
140 * @since WD.1
141 *
142 * @return array of supported serialization formats
143 */
144 public function getSupportedFormats() {
145 return $this->getContentHandler()->getSupportedFormats();
146 }
147
148 /**
149 * Returns true if $format is a supported serialization format for this Content object,
150 * false if it isn't.
151 *
152 * Note that this will always return true if $format is null, because null stands for the
153 * default serialization.
154 *
155 * Shorthand for $this->getContentHandler()->isSupportedFormat( $format )
156 *
157 * @since WD.1
158 *
159 * @param String $format the format to check
160 * @return bool whether the format is supported
161 */
162 public function isSupportedFormat( $format ) {
163 if ( !$format ) {
164 return true; // this means "use the default"
165 }
166
167 return $this->getContentHandler()->isSupportedFormat( $format );
168 }
169
170 /**
171 * Throws an MWException if $this->isSupportedFormat( $format ) doesn't return true.
172 *
173 * @param $format
174 * @throws MWException
175 */
176 protected function checkFormat( $format ) {
177 if ( !$this->isSupportedFormat( $format ) ) {
178 throw new MWException( "Format $format is not supported for content model " . $this->getModel() );
179 }
180 }
181
182 /**
183 * Convenience method for serializing this Content object.
184 *
185 * Shorthand for $this->getContentHandler()->serializeContent( $this, $format )
186 *
187 * @since WD.1
188 *
189 * @param null|String $format the desired serialization format (or null for the default format).
190 * @return String serialized form of this Content object
191 */
192 public function serialize( $format = null ) {
193 return $this->getContentHandler()->serializeContent( $this, $format );
194 }
195
196 /**
197 * Returns true if this Content object represents empty content.
198 *
199 * @since WD.1
200 *
201 * @return bool whether this Content object is empty
202 */
203 public function isEmpty() {
204 return $this->getSize() == 0;
205 }
206
207 /**
208 * Returns if the content is valid. This is intended for local validity checks, not considering global consistency.
209 * It needs to be valid before it can be saved.
210 *
211 * This default implementation always returns true.
212 *
213 * @since WD.1
214 *
215 * @return boolean
216 */
217 public function isValid() {
218 return true;
219 }
220
221 /**
222 * Returns true if this Content objects is conceptually equivalent to the given Content object.
223 *
224 * Will returns false if $that is null.
225 * Will return true if $that === $this.
226 * Will return false if $that->getModelName() != $this->getModel().
227 * Will return false if $that->getNativeData() is not equal to $this->getNativeData(),
228 * where the meaning of "equal" depends on the actual data model.
229 *
230 * Implementations should be careful to make equals() transitive and reflexive:
231 *
232 * * $a->equals( $b ) <=> $b->equals( $a )
233 * * $a->equals( $b ) && $b->equals( $c ) ==> $a->equals( $c )
234 *
235 * @since WD.1
236 *
237 * @param Content $that the Content object to compare to
238 * @return bool true if this Content object is euqual to $that, false otherwise.
239 */
240 public function equals( Content $that = null ) {
241 if ( is_null( $that ) ){
242 return false;
243 }
244
245 if ( $that === $this ) {
246 return true;
247 }
248
249 if ( $that->getModel() !== $this->getModel() ) {
250 return false;
251 }
252
253 return $this->getNativeData() === $that->getNativeData();
254 }
255
256 /**
257 * Return a copy of this Content object. The following must be true for the object returned
258 * if $copy = $original->copy()
259 *
260 * * get_class($original) === get_class($copy)
261 * * $original->getModel() === $copy->getModel()
262 * * $original->equals( $copy )
263 *
264 * If and only if the Content object is immutable, the copy() method can and should
265 * return $this. That is, $copy === $original may be true, but only for immutable content
266 * objects.
267 *
268 * @since WD.1
269 *
270 * @return Content. A copy of this object
271 */
272 public abstract function copy( );
273
274 /**
275 * Returns true if this content is countable as a "real" wiki page, provided
276 * that it's also in a countable location (e.g. a current revision in the main namespace).
277 *
278 * @since WD.1
279 *
280 * @param $hasLinks Bool: if it is known whether this content contains links, provide this information here,
281 * to avoid redundant parsing to find out.
282 * @return boolean
283 */
284 public abstract function isCountable( $hasLinks = null ) ;
285
286 /**
287 * Convenience method, shorthand for
288 * $this->getContentHandler()->getParserOutput( $this, $title, $revId, $options, $generateHtml )
289 *
290 * @note: subclasses should NOT override this to provide custom rendering.
291 * Override ContentHandler::getParserOutput() instead!
292 *
293 * @param Title $title
294 * @param null $revId
295 * @param null|ParserOptions $options
296 * @param Boolean $generateHtml whether to generate Html (default: true). If false,
297 * the result of calling getText() on the ParserOutput object returned by
298 * this method is undefined.
299 *
300 * @since WD.1
301 *
302 * @return ParserOutput
303 */
304 public function getParserOutput( Title $title, $revId = null, ParserOptions $options = null, $generateHtml = true ) {
305 return $this->getContentHandler()->getParserOutput( $this, $title, $revId, $options, $generateHtml );
306 }
307
308 /**
309 * Construct the redirect destination from this content and return an
310 * array of Titles, or null if this content doesn't represent a redirect.
311 * The last element in the array is the final destination after all redirects
312 * have been resolved (up to $wgMaxRedirects times).
313 *
314 * @since WD.1
315 *
316 * @return Array of Titles, with the destination last
317 */
318 public function getRedirectChain() {
319 return null;
320 }
321
322 /**
323 * Construct the redirect destination from this content and return a Title,
324 * or null if this content doesn't represent a redirect.
325 * This will only return the immediate redirect target, useful for
326 * the redirect table and other checks that don't need full recursion.
327 *
328 * @since WD.1
329 *
330 * @return Title: The corresponding Title
331 */
332 public function getRedirectTarget() {
333 return null;
334 }
335
336 /**
337 * Construct the redirect destination from this content and return the
338 * Title, or null if this content doesn't represent a redirect.
339 * This will recurse down $wgMaxRedirects times or until a non-redirect target is hit
340 * in order to provide (hopefully) the Title of the final destination instead of another redirect.
341 *
342 * @since WD.1
343 *
344 * @return Title
345 */
346 public function getUltimateRedirectTarget() {
347 return null;
348 }
349
350 /**
351 * @since WD.1
352 *
353 * @return bool
354 */
355 public function isRedirect() {
356 return $this->getRedirectTarget() !== null;
357 }
358
359 /**
360 * Returns the section with the given id.
361 *
362 * The default implementation returns null.
363 *
364 * @since WD.1
365 *
366 * @param String $sectionId the section's id, given as a numeric string. The id "0" retrieves the section before
367 * the first heading, "1" the text between the first heading (included) and the second heading (excluded), etc.
368 * @return Content|Boolean|null the section, or false if no such section exist, or null if sections are not supported
369 */
370 public function getSection( $sectionId ) {
371 return null;
372 }
373
374 /**
375 * Replaces a section of the content and returns a Content object with the section replaced.
376 *
377 * @since WD.1
378 *
379 * @param $section empty/null/false or a section number (0, 1, 2, T1, T2...), or "new"
380 * @param $with Content: new content of the section
381 * @param $sectionTitle String: new section's subject, only if $section is 'new'
382 * @return string Complete article text, or null if error
383 */
384 public function replaceSection( $section, Content $with, $sectionTitle = '' ) {
385 return null;
386 }
387
388 /**
389 * Returns a Content object with pre-save transformations applied (or this object if no transformations apply).
390 *
391 * @since WD.1
392 *
393 * @param Title $title
394 * @param User $user
395 * @param null|ParserOptions $popts
396 * @return Content
397 */
398 public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) {
399 return $this;
400 }
401
402 /**
403 * Returns a new WikitextContent object with the given section heading prepended, if supported.
404 * The default implementation just returns this Content object unmodified, ignoring the section header.
405 *
406 * @since WD.1
407 *
408 * @param $header String
409 * @return Content
410 */
411 public function addSectionHeader( $header ) {
412 return $this;
413 }
414
415 /**
416 * Returns a Content object with preload transformations applied (or this object if no transformations apply).
417 *
418 * @since WD.1
419 *
420 * @param Title $title
421 * @param null|ParserOptions $popts
422 * @return Content
423 */
424 public function preloadTransform( Title $title, ParserOptions $popts ) {
425 return $this;
426 }
427
428 # TODO: handle ImagePage and CategoryPage
429 # TODO: make sure we cover lucene search / wikisearch.
430 # TODO: make sure ReplaceTemplates still works
431 # FUTURE: nice&sane integration of GeSHi syntax highlighting
432 # [11:59] <vvv> Hooks are ugly; make CodeHighlighter interface and a config to set the class which handles syntax highlighting
433 # [12:00] <vvv> And default it to a DummyHighlighter
434
435 # TODO: make sure we cover the external editor interface (does anyone actually use that?!)
436
437 # TODO: tie into API to provide contentModel for Revisions
438 # TODO: tie into API to provide serialized version and contentFormat for Revisions
439 # TODO: tie into API edit interface
440 # FUTURE: make EditForm plugin for EditPage
441 }
442 # FUTURE: special type for redirects?!
443 # FUTURE: MultipartMultipart < WikipageContent (Main + Links + X)
444 # FUTURE: LinksContent < LanguageLinksContent, CategoriesContent
445
446 // @TODO: add support for ar_content_format, ar_content_model, rev_content_format, rev_content_model to API
447
448 /**
449 * Content object implementation for representing flat text.
450 *
451 * TextContent instances are immutable
452 *
453 * @since WD.1
454 */
455 abstract class TextContent extends Content {
456
457 public function __construct( $text, $model_id = null ) {
458 parent::__construct( $model_id );
459
460 $this->mText = $text;
461 }
462
463 public function copy() {
464 return $this; #NOTE: this is ok since TextContent are immutable.
465 }
466
467 public function getTextForSummary( $maxlength = 250 ) {
468 global $wgContLang;
469
470 $text = $this->getNativeData();
471
472 $truncatedtext = $wgContLang->truncate(
473 preg_replace( "/[\n\r]/", ' ', $text ),
474 max( 0, $maxlength ) );
475
476 return $truncatedtext;
477 }
478
479 /**
480 * returns the text's size in bytes.
481 *
482 * @return int the size
483 */
484 public function getSize( ) {
485 $text = $this->getNativeData( );
486 return strlen( $text );
487 }
488
489 /**
490 * Returns true if this content is not a redirect, and $wgArticleCountMethod is "any".
491 *
492 * @param $hasLinks Bool: if it is known whether this content contains links, provide this information here,
493 * to avoid redundant parsing to find out.
494 *
495 * @return bool true if the content is countable
496 */
497 public function isCountable( $hasLinks = null ) {
498 global $wgArticleCountMethod;
499
500 if ( $this->isRedirect( ) ) {
501 return false;
502 }
503
504 if ( $wgArticleCountMethod === 'any' ) {
505 return true;
506 }
507
508 return false;
509 }
510
511 /**
512 * Returns the text represented by this Content object, as a string.
513 *
514 * @return String the raw text
515 */
516 public function getNativeData( ) {
517 $text = $this->mText;
518 return $text;
519 }
520
521 /**
522 * Returns the text represented by this Content object, as a string.
523 *
524 * @return String the raw text
525 */
526 public function getTextForSearchIndex( ) {
527 return $this->getNativeData();
528 }
529
530 /**
531 * Returns the text represented by this Content object, as a string.
532 *
533 * @return String the raw text
534 */
535 public function getWikitextForTransclusion( ) {
536 return $this->getNativeData();
537 }
538
539 /**
540 * Diff this content object with another content object..
541 *
542 * @since WD.diff
543 *
544 * @param Content $that the other content object to compare this content object to
545 * @param Language $lang the language object to use for text segmentation. If not given, $wgContentLang is used.
546 *
547 * @return DiffResult a diff representing the changes that would have to be made to this content object
548 * to make it equal to $that.
549 */
550 public function diff( Content $that, Language $lang = null ) {
551 global $wgContLang;
552
553 $this->checkModelID( $that->getModel() );
554
555 #@todo: could implement this in DifferenceEngine and just delegate here?
556
557 if ( !$lang ) $lang = $wgContLang;
558
559 $otext = $this->getNativeData();
560 $ntext = $this->getNativeData();
561
562 # Note: Use native PHP diff, external engines don't give us abstract output
563 $ota = explode( "\n", $wgContLang->segmentForDiff( $otext ) );
564 $nta = explode( "\n", $wgContLang->segmentForDiff( $ntext ) );
565
566 $diff = new Diff( $ota, $nta );
567 return $diff;
568 }
569
570
571 }
572
573 /**
574 * @since WD.1
575 */
576 class WikitextContent extends TextContent {
577
578 public function __construct( $text ) {
579 parent::__construct($text, CONTENT_MODEL_WIKITEXT);
580 }
581
582 /**
583 * Returns the section with the given id.
584 *
585 * @param String $section
586 *
587 * @internal param String $sectionId the section's id
588 * @return Content|false|null the section, or false if no such section exist, or null if sections are not supported
589 */
590 public function getSection( $section ) {
591 global $wgParser;
592
593 $text = $this->getNativeData();
594 $sect = $wgParser->getSection( $text, $section, false );
595
596 return new WikitextContent( $sect );
597 }
598
599 /**
600 * Replaces a section in the wikitext
601 *
602 * @param $section empty/null/false or a section number (0, 1, 2, T1, T2...), or "new"
603 * @param $with Content: new content of the section
604 * @param $sectionTitle String: new section's subject, only if $section is 'new'
605 *
606 * @throws MWException
607 * @return Content Complete article content, or null if error
608 */
609 public function replaceSection( $section, Content $with, $sectionTitle = '' ) {
610 wfProfileIn( __METHOD__ );
611
612 $myModelId = $this->getModel();
613 $sectionModelId = $with->getModel();
614
615 if ( $sectionModelId != $myModelId ) {
616 $myModelName = ContentHandler::getContentModelName( $myModelId );
617 $sectionModelName = ContentHandler::getContentModelName( $sectionModelId );
618
619 throw new MWException( "Incompatible content model for section: document uses $myModelId ($myModelName), "
620 . "section uses $sectionModelId ($sectionModelName)." );
621 }
622
623 $oldtext = $this->getNativeData();
624 $text = $with->getNativeData();
625
626 if ( $section === '' ) {
627 return $with; #XXX: copy first?
628 } if ( $section == 'new' ) {
629 # Inserting a new section
630 $subject = $sectionTitle ? wfMsgForContent( 'newsectionheaderdefaultlevel', $sectionTitle ) . "\n\n" : '';
631 if ( wfRunHooks( 'PlaceNewSection', array( $this, $oldtext, $subject, &$text ) ) ) {
632 $text = strlen( trim( $oldtext ) ) > 0
633 ? "{$oldtext}\n\n{$subject}{$text}"
634 : "{$subject}{$text}";
635 }
636 } else {
637 # Replacing an existing section; roll out the big guns
638 global $wgParser;
639
640 $text = $wgParser->replaceSection( $oldtext, $section, $text );
641 }
642
643 $newContent = new WikitextContent( $text );
644
645 wfProfileOut( __METHOD__ );
646 return $newContent;
647 }
648
649 /**
650 * Returns a new WikitextContent object with the given section heading prepended.
651 *
652 * @param $header String
653 * @return Content
654 */
655 public function addSectionHeader( $header ) {
656 $text = wfMsgForContent( 'newsectionheaderdefaultlevel', $header ) . "\n\n" . $this->getNativeData();
657
658 return new WikitextContent( $text );
659 }
660
661 /**
662 * Returns a Content object with pre-save transformations applied (or this object if no transformations apply).
663 *
664 * @param Title $title
665 * @param User $user
666 * @param ParserOptions $popts
667 * @return Content
668 */
669 public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) { #FIXME: also needed for JS/CSS!
670 global $wgParser, $wgConteLang;
671
672 $text = $this->getNativeData();
673 $pst = $wgParser->preSaveTransform( $text, $title, $user, $popts );
674
675 return new WikitextContent( $pst );
676 }
677
678 /**
679 * Returns a Content object with preload transformations applied (or this object if no transformations apply).
680 *
681 * @param Title $title
682 * @param ParserOptions $popts
683 * @return Content
684 */
685 public function preloadTransform( Title $title, ParserOptions $popts ) {
686 global $wgParser, $wgConteLang;
687
688 $text = $this->getNativeData();
689 $plt = $wgParser->getPreloadText( $text, $title, $popts );
690
691 return new WikitextContent( $plt );
692 }
693
694 public function getRedirectChain() {
695 $text = $this->getNativeData();
696 return Title::newFromRedirectArray( $text );
697 }
698
699 public function getRedirectTarget() {
700 $text = $this->getNativeData();
701 return Title::newFromRedirect( $text );
702 }
703
704 public function getUltimateRedirectTarget() {
705 $text = $this->getNativeData();
706 return Title::newFromRedirectRecurse( $text );
707 }
708
709 /**
710 * Returns true if this content is not a redirect, and this content's text is countable according to
711 * the criteria defined by $wgArticleCountMethod.
712 *
713 * @param Bool $hasLinks if it is known whether this content contains links, provide this information here,
714 * to avoid redundant parsing to find out.
715 * @param null|\Title $title
716 *
717 * @internal param \IContextSource $context context for parsing if necessary
718 *
719 * @return bool true if the content is countable
720 */
721 public function isCountable( $hasLinks = null, Title $title = null ) {
722 global $wgArticleCountMethod, $wgRequest;
723
724 if ( $this->isRedirect( ) ) {
725 return false;
726 }
727
728 $text = $this->getNativeData();
729
730 switch ( $wgArticleCountMethod ) {
731 case 'any':
732 return true;
733 case 'comma':
734 return strpos( $text, ',' ) !== false;
735 case 'link':
736 if ( $hasLinks === null ) { # not known, find out
737 if ( !$title ) {
738 $context = RequestContext::getMain();
739 $title = $context->getTitle();
740 }
741
742 $po = $this->getParserOutput( $title, null, null, false );
743 $links = $po->getLinks();
744 $hasLinks = !empty( $links );
745 }
746
747 return $hasLinks;
748 }
749
750 return false;
751 }
752
753 public function getTextForSummary( $maxlength = 250 ) {
754 $truncatedtext = parent::getTextForSummary( $maxlength );
755
756 #clean up unfinished links
757 #XXX: make this optional? wasn't there in autosummary, but required for deletion summary.
758 $truncatedtext = preg_replace( '/\[\[([^\]]*)\]?$/', '$1', $truncatedtext );
759
760 return $truncatedtext;
761 }
762
763 }
764
765 /**
766 * @since WD.1
767 */
768 class MessageContent extends TextContent {
769 public function __construct( $msg_key, $params = null, $options = null ) {
770 parent::__construct(null, CONTENT_MODEL_WIKITEXT); #XXX: messages may be wikitext, html or plain text! and maybe even something else entirely.
771
772 $this->mMessageKey = $msg_key;
773
774 $this->mParameters = $params;
775
776 if ( is_null( $options ) ) {
777 $options = array();
778 }
779 elseif ( is_string( $options ) ) {
780 $options = array( $options );
781 }
782
783 $this->mOptions = $options;
784 }
785
786 /**
787 * Returns the message as rendered HTML, using the options supplied to the constructor plus "parse".
788 * @return String the message text, parsed
789 */
790 public function getHtml( ) {
791 $opt = array_merge( $this->mOptions, array('parse') );
792
793 return wfMsgExt( $this->mMessageKey, $this->mParameters, $opt );
794 }
795
796
797 /**
798 * Returns the message as raw text, using the options supplied to the constructor minus "parse" and "parseinline".
799 *
800 * @return String the message text, unparsed.
801 */
802 public function getNativeData( ) {
803 $opt = array_diff( $this->mOptions, array('parse', 'parseinline') );
804
805 return wfMsgExt( $this->mMessageKey, $this->mParameters, $opt );
806 }
807
808 }
809
810 /**
811 * @since WD.1
812 */
813 class JavaScriptContent extends TextContent {
814 public function __construct( $text ) {
815 parent::__construct($text, CONTENT_MODEL_JAVASCRIPT);
816 }
817
818 }
819
820 /**
821 * @since WD.1
822 */
823 class CssContent extends TextContent {
824 public function __construct( $text ) {
825 parent::__construct($text, CONTENT_MODEL_CSS);
826 }
827 }