Split parser related files to have one class in one file
[lhc/web/wiklou.git] / includes / parser / DateFormatter.php
1 <?php
2 /**
3 * Date formatter
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\MediaWikiServices;
25
26 /**
27 * Date formatter. Recognises dates and formats them according to a specified preference.
28 *
29 * This class was originally introduced to detect and transform dates in free text. It is now
30 * only used by the {{#dateformat}} parser function. This is a very rudimentary date formatter;
31 * Language::sprintfDate() has many more features and is the correct choice for most new code.
32 * The main advantage of this date formatter is that it is able to format incomplete dates with an
33 * unspecified year.
34 *
35 * @ingroup Parser
36 */
37 class DateFormatter {
38 /** @var string[] Date format regexes indexed the class constants */
39 private $regexes;
40
41 /**
42 * @var int[][] Array of special rules. The first key is the preference ID
43 * (one of the class constants), the second key is the detected source
44 * format, and the value is the ID of the target format that will be used
45 * in that case.
46 */
47 private $rules = [];
48
49 /**
50 * @var int[] Month numbers by lowercase name
51 */
52 private $xMonths = [];
53
54 /**
55 * @var string[] Month names by number
56 */
57 private $monthNames = [];
58
59 /**
60 * @var int[] A map of descriptive preference text to internal format ID
61 */
62 private $preferenceIDs;
63
64 /** @var string[] Format strings similar to those used by date(), indexed by ID */
65 private $targetFormats;
66
67 /** Used as a preference ID for rules that apply regardless of preference */
68 const ALL = -1;
69
70 /** No preference: the date may be left in the same format as the input */
71 const NONE = 0;
72
73 /** e.g. January 15, 2001 */
74 const MDY = 1;
75
76 /** e.g. 15 January 2001 */
77 const DMY = 2;
78
79 /** e.g. 2001 January 15 */
80 const YMD = 3;
81
82 /** e.g. 2001-01-15 */
83 const ISO = 4;
84
85 /** The highest ID that is a valid user preference */
86 const LASTPREF = 4;
87
88 /** e.g. 2001, 15 January */
89 const YDM = 5;
90
91 /** e.g. 15 January */
92 const DM = 6;
93
94 /** e.g. January 15 */
95 const MD = 7;
96
97 /** The highest ID that is a valid target format */
98 const LAST = 7;
99
100 /**
101 * @param Language $lang In which language to format the date
102 */
103 public function __construct( Language $lang ) {
104 $monthRegexParts = [];
105 for ( $i = 1; $i <= 12; $i++ ) {
106 $monthName = $lang->getMonthName( $i );
107 $monthAbbrev = $lang->getMonthAbbreviation( $i );
108 $this->monthNames[$i] = $monthName;
109 $monthRegexParts[] = preg_quote( $monthName, '/' );
110 $monthRegexParts[] = preg_quote( $monthAbbrev, '/' );
111 $this->xMonths[mb_strtolower( $monthName )] = $i;
112 $this->xMonths[mb_strtolower( $monthAbbrev )] = $i;
113 }
114
115 // Partial regular expressions
116 $monthNames = implode( '|', $monthRegexParts );
117 $dm = "(?<day>\d{1,2})[ _](?<monthName>{$monthNames})";
118 $md = "(?<monthName>{$monthNames})[ _](?<day>\d{1,2})";
119 $y = '(?<year>\d{1,4}([ _]BC|))';
120 $iso = '(?<isoYear>-?\d{4})-(?<isoMonth>\d{2})-(?<isoDay>\d{2})';
121
122 $this->regexes = [
123 self::DMY => "/^{$dm}(?: *, *| +){$y}$/iu",
124 self::YDM => "/^{$y}(?: *, *| +){$dm}$/iu",
125 self::MDY => "/^{$md}(?: *, *| +){$y}$/iu",
126 self::YMD => "/^{$y}(?: *, *| +){$md}$/iu",
127 self::DM => "/^{$dm}$/iu",
128 self::MD => "/^{$md}$/iu",
129 self::ISO => "/^{$iso}$/iu",
130 ];
131
132 // Target date formats
133 $this->targetFormats = [
134 self::DMY => 'j F Y',
135 self::YDM => 'Y, j F',
136 self::MDY => 'F j, Y',
137 self::YMD => 'Y F j',
138 self::DM => 'j F',
139 self::MD => 'F j',
140 self::ISO => 'y-m-d',
141 ];
142
143 // Rules
144 // pref source target
145 $this->rules[self::DMY][self::MD] = self::DM;
146 $this->rules[self::ALL][self::MD] = self::MD;
147 $this->rules[self::MDY][self::DM] = self::MD;
148 $this->rules[self::ALL][self::DM] = self::DM;
149 $this->rules[self::NONE][self::ISO] = self::ISO;
150
151 $this->preferenceIDs = [
152 'default' => self::NONE,
153 'dmy' => self::DMY,
154 'mdy' => self::MDY,
155 'ymd' => self::YMD,
156 'ISO 8601' => self::ISO,
157 ];
158 }
159
160 /**
161 * Get a DateFormatter object
162 *
163 * @deprecated since 1.33 use MediaWikiServices::getDateFormatterFactory()
164 *
165 * @param Language|null $lang In which language to format the date
166 * Defaults to the site content language
167 * @return DateFormatter
168 */
169 public static function getInstance( Language $lang = null ) {
170 $lang = $lang ?? MediaWikiServices::getInstance()->getContentLanguage();
171 return MediaWikiServices::getInstance()->getDateFormatterFactory()->get( $lang );
172 }
173
174 /**
175 * @param string $preference User preference, must be one of "default",
176 * "dmy", "mdy", "ymd" or "ISO 8601".
177 * @param string $text Text to reformat
178 * @param array $options Ignored. Since 1.33, 'match-whole' is implied, and
179 * 'linked' has been removed.
180 *
181 * @return string
182 */
183 public function reformat( $preference, $text, $options = [] ) {
184 if ( isset( $this->preferenceIDs[$preference] ) ) {
185 $preference = $this->preferenceIDs[$preference];
186 } else {
187 $preference = self::NONE;
188 }
189 for ( $source = 1; $source <= self::LAST; $source++ ) {
190 if ( isset( $this->rules[$preference][$source] ) ) {
191 # Specific rules
192 $target = $this->rules[$preference][$source];
193 } elseif ( isset( $this->rules[self::ALL][$source] ) ) {
194 # General rules
195 $target = $this->rules[self::ALL][$source];
196 } elseif ( $preference ) {
197 # User preference
198 $target = $preference;
199 } else {
200 # Default
201 $target = $source;
202 }
203 $regex = $this->regexes[$source];
204
205 $text = preg_replace_callback( $regex,
206 function ( $match ) use ( $target ) {
207 $format = $this->targetFormats[$target];
208
209 $text = '';
210
211 // Pre-generate y/Y stuff because we need the year for the <span> title.
212 if ( !isset( $match['isoYear'] ) && isset( $match['year'] ) ) {
213 $match['isoYear'] = $this->makeIsoYear( $match['year'] );
214 }
215 if ( !isset( $match['year'] ) && isset( $match['isoYear'] ) ) {
216 $match['year'] = $this->makeNormalYear( $match['isoYear'] );
217 }
218
219 if ( !isset( $match['isoMonth'] ) ) {
220 $m = $this->makeIsoMonth( $match['monthName'] );
221 if ( $m === false ) {
222 // Fail
223 return $match[0];
224 } else {
225 $match['isoMonth'] = $m;
226 }
227 }
228
229 if ( !isset( $match['isoDay'] ) ) {
230 $match['isoDay'] = sprintf( '%02d', $match['day'] );
231 }
232
233 $formatLength = strlen( $format );
234 for ( $p = 0; $p < $formatLength; $p++ ) {
235 $char = $format[$p];
236 switch ( $char ) {
237 case 'd': // ISO day of month
238 $text .= $match['isoDay'];
239 break;
240 case 'm': // ISO month
241 $text .= $match['isoMonth'];
242 break;
243 case 'y': // ISO year
244 $text .= $match['isoYear'];
245 break;
246 case 'j': // ordinary day of month
247 if ( !isset( $match['day'] ) ) {
248 $text .= intval( $match['isoDay'] );
249 } else {
250 $text .= $match['day'];
251 }
252 break;
253 case 'F': // long month
254 $m = intval( $match['isoMonth'] );
255 if ( $m > 12 || $m < 1 ) {
256 // Fail
257 return $match[0];
258 } else {
259 $text .= $this->monthNames[$m];
260 }
261 break;
262 case 'Y': // ordinary (optional BC) year
263 $text .= $match['year'];
264 break;
265 default:
266 $text .= $char;
267 }
268 }
269
270 $isoBits = [];
271 if ( isset( $match['isoYear'] ) ) {
272 $isoBits[] = $match['isoYear'];
273 }
274 $isoBits[] = $match['isoMonth'];
275 $isoBits[] = $match['isoDay'];
276 $isoDate = implode( '-', $isoBits );
277
278 // Output is not strictly HTML (it's wikitext), but <span> is whitelisted.
279 $text = Html::rawElement( 'span',
280 [ 'class' => 'mw-formatted-date', 'title' => $isoDate ], $text );
281
282 return $text;
283 }, $text
284 );
285 }
286 return $text;
287 }
288
289 /**
290 * Makes an ISO month, e.g. 02, from a month name
291 * @param string $monthName Month name
292 * @return string|false ISO month name, or false if the input was invalid
293 */
294 private function makeIsoMonth( $monthName ) {
295 $isoMonth = $this->xMonths[mb_strtolower( $monthName )] ?? false;
296 if ( $isoMonth === false ) {
297 return false;
298 }
299 return sprintf( '%02d', $isoMonth );
300 }
301
302 /**
303 * Make an ISO year from a year name, for instance: '-1199' from '1200 BC'
304 * @param string $year Year name
305 * @return string ISO year name
306 */
307 private function makeIsoYear( $year ) {
308 // Assumes the year is in a nice format, as enforced by the regex
309 if ( substr( $year, -2 ) == 'BC' ) {
310 $num = intval( substr( $year, 0, -3 ) ) - 1;
311 // PHP bug note: sprintf( "%04d", -1 ) fails poorly
312 $text = sprintf( '-%04d', $num );
313 } else {
314 $text = sprintf( '%04d', $year );
315 }
316 return $text;
317 }
318
319 /**
320 * Make a year from an ISO year, for instance: '400 BC' from '-0399'.
321 * @param string $iso ISO year
322 * @return int|string int representing year number in case of AD dates, or string containing
323 * year number and 'BC' at the end otherwise.
324 */
325 private function makeNormalYear( $iso ) {
326 if ( $iso[0] == '-' ) {
327 $text = ( intval( substr( $iso, 1 ) ) + 1 ) . ' BC';
328 } else {
329 $text = intval( $iso );
330 }
331 return $text;
332 }
333 }