Merge "Revert "Log the reason why revision->getContent() returns null""
[lhc/web/wiklou.git] / includes / parser / DateFormatter.php
1 <?php
2 /**
3 * Date formatter
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 /**
25 * Date formatter, recognises dates in plain text and formats them according to user preferences.
26 * @todo preferences, OutputPage
27 * @ingroup Parser
28 */
29 class DateFormatter {
30 private $mSource, $mTarget;
31 private $monthNames = '';
32
33 private $regexes;
34 private $rules, $xMonths, $preferences;
35
36 private $lang, $mLinked;
37
38 /** @var string[] */
39 private $keys;
40
41 /** @var string[] */
42 private $targets;
43
44 const ALL = -1;
45 const NONE = 0;
46 const MDY = 1;
47 const DMY = 2;
48 const YMD = 3;
49 const ISO1 = 4;
50 const LASTPREF = 4;
51 const ISO2 = 5;
52 const YDM = 6;
53 const DM = 7;
54 const MD = 8;
55 const LAST = 8;
56
57 /**
58 * @param Language $lang In which language to format the date
59 */
60 public function __construct( Language $lang ) {
61 $this->lang = $lang;
62
63 $this->monthNames = $this->getMonthRegex();
64 for ( $i = 1; $i <= 12; $i++ ) {
65 $this->xMonths[$this->lang->lc( $this->lang->getMonthName( $i ) )] = $i;
66 $this->xMonths[$this->lang->lc( $this->lang->getMonthAbbreviation( $i ) )] = $i;
67 }
68
69 $this->regexTrail = '(?![a-z])/iu';
70
71 # Partial regular expressions
72 $this->prxDM = '\[\[(\d{1,2})[ _](' . $this->monthNames . ')\]\]';
73 $this->prxMD = '\[\[(' . $this->monthNames . ')[ _](\d{1,2})\]\]';
74 $this->prxY = '\[\[(\d{1,4}([ _]BC|))\]\]';
75 $this->prxISO1 = '\[\[(-?\d{4})]]-\[\[(\d{2})-(\d{2})\]\]';
76 $this->prxISO2 = '\[\[(-?\d{4})-(\d{2})-(\d{2})\]\]';
77
78 # Real regular expressions
79 $this->regexes[self::DMY] = "/{$this->prxDM}(?: *, *| +){$this->prxY}{$this->regexTrail}";
80 $this->regexes[self::YDM] = "/{$this->prxY}(?: *, *| +){$this->prxDM}{$this->regexTrail}";
81 $this->regexes[self::MDY] = "/{$this->prxMD}(?: *, *| +){$this->prxY}{$this->regexTrail}";
82 $this->regexes[self::YMD] = "/{$this->prxY}(?: *, *| +){$this->prxMD}{$this->regexTrail}";
83 $this->regexes[self::DM] = "/{$this->prxDM}{$this->regexTrail}";
84 $this->regexes[self::MD] = "/{$this->prxMD}{$this->regexTrail}";
85 $this->regexes[self::ISO1] = "/{$this->prxISO1}{$this->regexTrail}";
86 $this->regexes[self::ISO2] = "/{$this->prxISO2}{$this->regexTrail}";
87
88 # Extraction keys
89 # See the comments in replace() for the meaning of the letters
90 $this->keys[self::DMY] = 'jFY';
91 $this->keys[self::YDM] = 'Y jF';
92 $this->keys[self::MDY] = 'FjY';
93 $this->keys[self::YMD] = 'Y Fj';
94 $this->keys[self::DM] = 'jF';
95 $this->keys[self::MD] = 'Fj';
96 $this->keys[self::ISO1] = 'ymd'; # y means ISO year
97 $this->keys[self::ISO2] = 'ymd';
98
99 # Target date formats
100 $this->targets[self::DMY] = '[[F j|j F]] [[Y]]';
101 $this->targets[self::YDM] = '[[Y]], [[F j|j F]]';
102 $this->targets[self::MDY] = '[[F j]], [[Y]]';
103 $this->targets[self::YMD] = '[[Y]] [[F j]]';
104 $this->targets[self::DM] = '[[F j|j F]]';
105 $this->targets[self::MD] = '[[F j]]';
106 $this->targets[self::ISO1] = '[[Y|y]]-[[F j|m-d]]';
107 $this->targets[self::ISO2] = '[[y-m-d]]';
108
109 # Rules
110 # pref source target
111 $this->rules[self::DMY][self::MD] = self::DM;
112 $this->rules[self::ALL][self::MD] = self::MD;
113 $this->rules[self::MDY][self::DM] = self::MD;
114 $this->rules[self::ALL][self::DM] = self::DM;
115 $this->rules[self::NONE][self::ISO2] = self::ISO1;
116
117 $this->preferences = [
118 'default' => self::NONE,
119 'dmy' => self::DMY,
120 'mdy' => self::MDY,
121 'ymd' => self::YMD,
122 'ISO 8601' => self::ISO1,
123 ];
124 }
125
126 /**
127 * Get a DateFormatter object
128 *
129 * @param Language|null $lang In which language to format the date
130 * Defaults to the site content language
131 * @return DateFormatter
132 */
133 public static function getInstance( $lang = null ) {
134 global $wgContLang, $wgMainCacheType;
135
136 if ( is_string( $lang ) ) {
137 wfDeprecated( __METHOD__ . ' with type string for $lang', '1.31' );
138 }
139 $lang = $lang ? wfGetLangObj( $lang ) : $wgContLang;
140 $cache = ObjectCache::getLocalServerInstance( $wgMainCacheType );
141
142 static $dateFormatter = false;
143 if ( !$dateFormatter ) {
144 $dateFormatter = $cache->getWithSetCallback(
145 $cache->makeKey( 'dateformatter', $lang->getCode() ),
146 $cache::TTL_HOUR,
147 function () use ( $lang ) {
148 return new DateFormatter( $lang );
149 }
150 );
151 }
152
153 return $dateFormatter;
154 }
155
156 /**
157 * @param string $preference User preference
158 * @param string $text Text to reformat
159 * @param array $options Array can contain 'linked' and/or 'match-whole'
160 *
161 * @return string
162 */
163 public function reformat( $preference, $text, $options = [ 'linked' ] ) {
164 $linked = in_array( 'linked', $options );
165 $match_whole = in_array( 'match-whole', $options );
166
167 if ( isset( $this->preferences[$preference] ) ) {
168 $preference = $this->preferences[$preference];
169 } else {
170 $preference = self::NONE;
171 }
172 for ( $i = 1; $i <= self::LAST; $i++ ) {
173 $this->mSource = $i;
174 if ( isset( $this->rules[$preference][$i] ) ) {
175 # Specific rules
176 $this->mTarget = $this->rules[$preference][$i];
177 } elseif ( isset( $this->rules[self::ALL][$i] ) ) {
178 # General rules
179 $this->mTarget = $this->rules[self::ALL][$i];
180 } elseif ( $preference ) {
181 # User preference
182 $this->mTarget = $preference;
183 } else {
184 # Default
185 $this->mTarget = $i;
186 }
187 $regex = $this->regexes[$i];
188
189 // Horrible hack
190 if ( !$linked ) {
191 $regex = str_replace( [ '\[\[', '\]\]' ], '', $regex );
192 }
193
194 if ( $match_whole ) {
195 // Let's hope this works
196 $regex = preg_replace( '!^/!', '/^', $regex );
197 $regex = str_replace( $this->regexTrail,
198 '$' . $this->regexTrail, $regex );
199 }
200
201 // Another horrible hack
202 $this->mLinked = $linked;
203 $text = preg_replace_callback( $regex, [ $this, 'replace' ], $text );
204 unset( $this->mLinked );
205 }
206 return $text;
207 }
208
209 /**
210 * Regexp replacement callback
211 *
212 * @param array $matches
213 * @return string
214 */
215 private function replace( $matches ) {
216 # Extract information from $matches
217 $linked = true;
218 if ( isset( $this->mLinked ) ) {
219 $linked = $this->mLinked;
220 }
221
222 $bits = [];
223 $key = $this->keys[$this->mSource];
224 $keyLength = strlen( $key );
225 for ( $p = 0; $p < $keyLength; $p++ ) {
226 if ( $key[$p] != ' ' ) {
227 $bits[$key[$p]] = $matches[$p + 1];
228 }
229 }
230
231 return $this->formatDate( $bits, $matches[0], $linked );
232 }
233
234 /**
235 * @param array $bits
236 * @param string $orig Original input string, to be returned
237 * on formatting failure.
238 * @param bool $link
239 * @return string
240 */
241 private function formatDate( $bits, $orig, $link = true ) {
242 $format = $this->targets[$this->mTarget];
243
244 if ( !$link ) {
245 // strip piped links
246 $format = preg_replace( '/\[\[[^|]+\|([^\]]+)\]\]/', '$1', $format );
247 // strip remaining links
248 $format = str_replace( [ '[[', ']]' ], '', $format );
249 }
250
251 # Construct new date
252 $text = '';
253 $fail = false;
254
255 // Pre-generate y/Y stuff because we need the year for the <span> title.
256 if ( !isset( $bits['y'] ) && isset( $bits['Y'] ) ) {
257 $bits['y'] = $this->makeIsoYear( $bits['Y'] );
258 }
259 if ( !isset( $bits['Y'] ) && isset( $bits['y'] ) ) {
260 $bits['Y'] = $this->makeNormalYear( $bits['y'] );
261 }
262
263 if ( !isset( $bits['m'] ) ) {
264 $m = $this->makeIsoMonth( $bits['F'] );
265 if ( !$m || $m == '00' ) {
266 $fail = true;
267 } else {
268 $bits['m'] = $m;
269 }
270 }
271
272 if ( !isset( $bits['d'] ) ) {
273 $bits['d'] = sprintf( '%02d', $bits['j'] );
274 }
275
276 $formatLength = strlen( $format );
277 for ( $p = 0; $p < $formatLength; $p++ ) {
278 $char = $format[$p];
279 switch ( $char ) {
280 case 'd': # ISO day of month
281 $text .= $bits['d'];
282 break;
283 case 'm': # ISO month
284 $text .= $bits['m'];
285 break;
286 case 'y': # ISO year
287 $text .= $bits['y'];
288 break;
289 case 'j': # ordinary day of month
290 if ( !isset( $bits['j'] ) ) {
291 $text .= intval( $bits['d'] );
292 } else {
293 $text .= $bits['j'];
294 }
295 break;
296 case 'F': # long month
297 if ( !isset( $bits['F'] ) ) {
298 $m = intval( $bits['m'] );
299 if ( $m > 12 || $m < 1 ) {
300 $fail = true;
301 } else {
302 $text .= $this->lang->getMonthName( $m );
303 }
304 } else {
305 $text .= ucfirst( $bits['F'] );
306 }
307 break;
308 case 'Y': # ordinary (optional BC) year
309 $text .= $bits['Y'];
310 break;
311 default:
312 $text .= $char;
313 }
314 }
315 if ( $fail ) {
316 // This occurs when parsing a date with day or month outside the bounds
317 // of possibilities.
318 $text = $orig;
319 }
320
321 $isoBits = [];
322 if ( isset( $bits['y'] ) ) {
323 $isoBits[] = $bits['y'];
324 }
325 $isoBits[] = $bits['m'];
326 $isoBits[] = $bits['d'];
327 $isoDate = implode( '-', $isoBits );
328
329 // Output is not strictly HTML (it's wikitext), but <span> is whitelisted.
330 $text = Html::rawElement( 'span',
331 [ 'class' => 'mw-formatted-date', 'title' => $isoDate ], $text );
332
333 return $text;
334 }
335
336 /**
337 * Return a regex that can be used to find month names in string
338 * @return string regex to find the months with
339 */
340 private function getMonthRegex() {
341 $names = [];
342 for ( $i = 1; $i <= 12; $i++ ) {
343 $names[] = $this->lang->getMonthName( $i );
344 $names[] = $this->lang->getMonthAbbreviation( $i );
345 }
346 return implode( '|', $names );
347 }
348
349 /**
350 * Makes an ISO month, e.g. 02, from a month name
351 * @param string $monthName Month name
352 * @return string ISO month name
353 */
354 private function makeIsoMonth( $monthName ) {
355 $n = $this->xMonths[$this->lang->lc( $monthName )];
356 return sprintf( '%02d', $n );
357 }
358
359 /**
360 * Make an ISO year from a year name, for instance: '-1199' from '1200 BC'
361 * @param string $year Year name
362 * @return string ISO year name
363 */
364 private function makeIsoYear( $year ) {
365 # Assumes the year is in a nice format, as enforced by the regex
366 if ( substr( $year, -2 ) == 'BC' ) {
367 $num = intval( substr( $year, 0, -3 ) ) - 1;
368 # PHP bug note: sprintf( "%04d", -1 ) fails poorly
369 $text = sprintf( '-%04d', $num );
370
371 } else {
372 $text = sprintf( '%04d', $year );
373 }
374 return $text;
375 }
376
377 /**
378 * Make a year one from an ISO year, for instance: '400 BC' from '-0399'.
379 * @param string $iso ISO year
380 * @return int|string int representing year number in case of AD dates, or string containing
381 * year number and 'BC' at the end otherwise.
382 */
383 private function makeNormalYear( $iso ) {
384 if ( $iso[0] == '-' ) {
385 $text = ( intval( substr( $iso, 1 ) ) + 1 ) . ' BC';
386 } else {
387 $text = intval( $iso );
388 }
389 return $text;
390 }
391 }