Merge "RevisionStoreDbTestBase, remove redundant needsDB override"
[lhc/web/wiklou.git] / includes / content / TextContent.php
1 <?php
2 /**
3 * Content object implementation for representing flat text.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @since 1.21
21 *
22 * @file
23 * @ingroup Content
24 *
25 * @author Daniel Kinzler
26 */
27
28 use MediaWiki\MediaWikiServices;
29
30 /**
31 * Content object implementation for representing flat text.
32 *
33 * TextContent instances are immutable
34 *
35 * @ingroup Content
36 */
37 class TextContent extends AbstractContent {
38
39 /**
40 * @var string
41 */
42 protected $mText;
43
44 /**
45 * @param string $text
46 * @param string $model_id
47 * @throws MWException
48 */
49 public function __construct( $text, $model_id = CONTENT_MODEL_TEXT ) {
50 parent::__construct( $model_id );
51
52 if ( $text === null || $text === false ) {
53 wfWarn( "TextContent constructed with \$text = " . var_export( $text, true ) . "! "
54 . "This may indicate an error in the caller's scope.", 2 );
55
56 $text = '';
57 }
58
59 if ( !is_string( $text ) ) {
60 throw new MWException( "TextContent expects a string in the constructor." );
61 }
62
63 $this->mText = $text;
64 }
65
66 /**
67 * @note Mutable subclasses MUST override this to return a copy!
68 *
69 * @return Content $this
70 */
71 public function copy() {
72 return $this; # NOTE: this is ok since TextContent are immutable.
73 }
74
75 public function getTextForSummary( $maxlength = 250 ) {
76 $text = $this->getNativeData();
77
78 $truncatedtext = MediaWikiServices::getInstance()->getContentLanguage()->
79 truncateForDatabase( preg_replace( "/[\n\r]/", ' ', $text ), max( 0, $maxlength ) );
80
81 return $truncatedtext;
82 }
83
84 /**
85 * Returns the text's size in bytes.
86 *
87 * @return int
88 */
89 public function getSize() {
90 $text = $this->getNativeData();
91
92 return strlen( $text );
93 }
94
95 /**
96 * Returns true if this content is not a redirect, and $wgArticleCountMethod
97 * is "any".
98 *
99 * @param bool|null $hasLinks If it is known whether this content contains links,
100 * provide this information here, to avoid redundant parsing to find out.
101 *
102 * @return bool
103 */
104 public function isCountable( $hasLinks = null ) {
105 global $wgArticleCountMethod;
106
107 if ( $this->isRedirect() ) {
108 return false;
109 }
110
111 if ( $wgArticleCountMethod === 'any' ) {
112 return true;
113 }
114
115 return false;
116 }
117
118 /**
119 * Returns the text represented by this Content object, as a string.
120 *
121 * @return string The raw text.
122 */
123 public function getNativeData() {
124 return $this->mText;
125 }
126
127 /**
128 * Returns the text represented by this Content object, as a string.
129 *
130 * @return string The raw text.
131 */
132 public function getTextForSearchIndex() {
133 return $this->getNativeData();
134 }
135
136 /**
137 * Returns attempts to convert this content object to wikitext,
138 * and then returns the text string. The conversion may be lossy.
139 *
140 * @note this allows any text-based content to be transcluded as if it was wikitext.
141 *
142 * @return string|bool The raw text, or false if the conversion failed.
143 */
144 public function getWikitextForTransclusion() {
145 $wikitext = $this->convert( CONTENT_MODEL_WIKITEXT, 'lossy' );
146
147 if ( $wikitext ) {
148 return $wikitext->getNativeData();
149 } else {
150 return false;
151 }
152 }
153
154 /**
155 * Do a "\r\n" -> "\n" and "\r" -> "\n" transformation
156 * as well as trim trailing whitespace
157 *
158 * This was formerly part of Parser::preSaveTransform, but
159 * for non-wikitext content models they probably still want
160 * to normalize line endings without all of the other PST
161 * changes.
162 *
163 * @since 1.28
164 * @param string $text
165 * @return string
166 */
167 public static function normalizeLineEndings( $text ) {
168 return str_replace( [ "\r\n", "\r" ], "\n", rtrim( $text ) );
169 }
170
171 /**
172 * Returns a Content object with pre-save transformations applied.
173 *
174 * At a minimum, subclasses should make sure to call TextContent::normalizeLineEndings()
175 * either directly or part of Parser::preSaveTransform().
176 *
177 * @param Title $title
178 * @param User $user
179 * @param ParserOptions $popts
180 *
181 * @return Content
182 */
183 public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) {
184 $text = $this->getNativeData();
185 $pst = self::normalizeLineEndings( $text );
186
187 return ( $text === $pst ) ? $this : new static( $pst, $this->getModel() );
188 }
189
190 /**
191 * Diff this content object with another content object.
192 *
193 * @since 1.21
194 *
195 * @param Content $that The other content object to compare this content object to.
196 * @param Language|null $lang The language object to use for text segmentation.
197 * If not given, the content language is used.
198 *
199 * @return Diff A diff representing the changes that would have to be
200 * made to this content object to make it equal to $that.
201 */
202 public function diff( Content $that, Language $lang = null ) {
203 $this->checkModelID( $that->getModel() );
204
205 // @todo could implement this in DifferenceEngine and just delegate here?
206
207 if ( !$lang ) {
208 $lang = MediaWikiServices::getInstance()->getContentLanguage();
209 }
210
211 $otext = $this->getNativeData();
212 $ntext = $that->getNativeData();
213
214 # Note: Use native PHP diff, external engines don't give us abstract output
215 $ota = explode( "\n", $lang->segmentForDiff( $otext ) );
216 $nta = explode( "\n", $lang->segmentForDiff( $ntext ) );
217
218 $diff = new Diff( $ota, $nta );
219
220 return $diff;
221 }
222
223 /**
224 * Fills the provided ParserOutput object with information derived from the content.
225 * Unless $generateHtml was false, this includes an HTML representation of the content
226 * provided by getHtml().
227 *
228 * For content models listed in $wgTextModelsToParse, this method will call the MediaWiki
229 * wikitext parser on the text to extract any (wikitext) links, magic words, etc.
230 *
231 * Subclasses may override this to provide custom content processing.
232 * For custom HTML generation alone, it is sufficient to override getHtml().
233 *
234 * @param Title $title Context title for parsing
235 * @param int $revId Revision ID (for {{REVISIONID}})
236 * @param ParserOptions $options
237 * @param bool $generateHtml Whether or not to generate HTML
238 * @param ParserOutput &$output The output object to fill (reference).
239 */
240 protected function fillParserOutput( Title $title, $revId,
241 ParserOptions $options, $generateHtml, ParserOutput &$output
242 ) {
243 global $wgParser, $wgTextModelsToParse;
244
245 if ( in_array( $this->getModel(), $wgTextModelsToParse ) ) {
246 // parse just to get links etc into the database, HTML is replaced below.
247 $output = $wgParser->parse( $this->getNativeData(), $title, $options, true, true, $revId );
248 }
249
250 if ( $generateHtml ) {
251 $html = $this->getHtml();
252 } else {
253 $html = '';
254 }
255
256 $output->clearWrapperDivClass();
257 $output->setText( $html );
258 }
259
260 /**
261 * Generates an HTML version of the content, for display. Used by
262 * fillParserOutput() to provide HTML for the ParserOutput object.
263 *
264 * Subclasses may override this to provide a custom HTML rendering.
265 * If further information is to be derived from the content (such as
266 * categories), the fillParserOutput() method can be overridden instead.
267 *
268 * For backwards-compatibility, this default implementation just calls
269 * getHighlightHtml().
270 *
271 * @return string An HTML representation of the content
272 */
273 protected function getHtml() {
274 return $this->getHighlightHtml();
275 }
276
277 /**
278 * Generates an HTML version of the content, for display.
279 *
280 * This default implementation returns an HTML-escaped version
281 * of the raw text content.
282 *
283 * @note The functionality of this method should really be implemented
284 * in getHtml(), and subclasses should override getHtml() if needed.
285 * getHighlightHtml() is kept around for backward compatibility with
286 * extensions that already override it.
287 *
288 * @deprecated since 1.24. Use getHtml() instead. In particular, subclasses overriding
289 * getHighlightHtml() should override getHtml() instead.
290 *
291 * @return string An HTML representation of the content
292 */
293 protected function getHighlightHtml() {
294 return htmlspecialchars( $this->getNativeData() );
295 }
296
297 /**
298 * This implementation provides lossless conversion between content models based
299 * on TextContent.
300 *
301 * @param string $toModel The desired content model, use the CONTENT_MODEL_XXX flags.
302 * @param string $lossy Flag, set to "lossy" to allow lossy conversion. If lossy conversion is not
303 * allowed, full round-trip conversion is expected to work without losing information.
304 *
305 * @return Content|bool A content object with the content model $toModel, or false if that
306 * conversion is not supported.
307 *
308 * @see Content::convert()
309 */
310 public function convert( $toModel, $lossy = '' ) {
311 $converted = parent::convert( $toModel, $lossy );
312
313 if ( $converted !== false ) {
314 return $converted;
315 }
316
317 $toHandler = ContentHandler::getForModelID( $toModel );
318
319 if ( $toHandler instanceof TextContentHandler ) {
320 // NOTE: ignore content serialization format - it's just text anyway.
321 $text = $this->getNativeData();
322 $converted = $toHandler->unserializeContent( $text );
323 }
324
325 return $converted;
326 }
327
328 }