3 * @defgroup Language Language
9 if ( !defined( 'MEDIAWIKI' ) ) {
10 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
15 global $wgLanguageNames;
16 require_once( dirname( __FILE__
) . '/Names.php' );
18 global $wgInputEncoding, $wgOutputEncoding;
21 * These are always UTF-8, they exist only for backwards compatibility
23 $wgInputEncoding = 'UTF-8';
24 $wgOutputEncoding = 'UTF-8';
26 if ( function_exists( 'mb_strtoupper' ) ) {
27 mb_internal_encoding( 'UTF-8' );
31 * a fake language converter
37 function FakeConverter( $langobj ) { $this->mLang
= $langobj; }
38 function autoConvertToAllVariants( $text ) { return $text; }
39 function convert( $t ) { return $t; }
40 function convertTitle( $t ) { return $t->getPrefixedText(); }
41 function getVariants() { return array( $this->mLang
->getCode() ); }
42 function getPreferredVariant() { return $this->mLang
->getCode(); }
43 function getConvRuleTitle() { return false; }
44 function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
45 function getExtraHashOptions() { return ''; }
46 function getParsedTitle() { return ''; }
47 function markNoConversion( $text, $noParse = false ) { return $text; }
48 function convertCategoryKey( $key ) { return $key; }
49 function convertLinkToAllVariants( $text ) { return array( $this->mLang
->getCode() => $text ); }
50 function armourMath( $text ) { return $text; }
54 * Internationalisation code
58 var $mConverter, $mVariants, $mCode, $mLoaded = false;
59 var $mMagicExtensions = array(), $mMagicHookDone = false;
61 var $mNamespaceIds, $namespaceNames, $namespaceAliases;
62 var $dateFormatStrings = array();
63 var $mExtendedSpecialPageAliases;
66 * ReplacementArray object caches
68 var $transformData = array();
70 static public $dataCache;
71 static public $mLangObjCache = array();
73 static public $mWeekdayMsgs = array(
74 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
78 static public $mWeekdayAbbrevMsgs = array(
79 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
82 static public $mMonthMsgs = array(
83 'january', 'february', 'march', 'april', 'may_long', 'june',
84 'july', 'august', 'september', 'october', 'november',
87 static public $mMonthGenMsgs = array(
88 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
89 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
92 static public $mMonthAbbrevMsgs = array(
93 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
94 'sep', 'oct', 'nov', 'dec'
97 static public $mIranianCalendarMonthMsgs = array(
98 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
99 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
100 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
101 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
104 static public $mHebrewCalendarMonthMsgs = array(
105 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
106 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
107 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
108 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
109 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
112 static public $mHebrewCalendarMonthGenMsgs = array(
113 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
114 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
115 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
116 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
117 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
120 static public $mHijriCalendarMonthMsgs = array(
121 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
122 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
123 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
124 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
128 * Get a cached language object for a given language code
130 static function factory( $code ) {
131 if ( !isset( self
::$mLangObjCache[$code] ) ) {
132 if ( count( self
::$mLangObjCache ) > 10 ) {
133 // Don't keep a billion objects around, that's stupid.
134 self
::$mLangObjCache = array();
136 self
::$mLangObjCache[$code] = self
::newFromCode( $code );
138 return self
::$mLangObjCache[$code];
142 * Create a language object for a given language code
144 protected static function newFromCode( $code ) {
146 static $recursionLevel = 0;
147 if ( $code == 'en' ) {
150 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
151 // Preload base classes to work around APC/PHP5 bug
152 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
153 include_once( "$IP/languages/classes/$class.deps.php" );
155 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
156 include_once( "$IP/languages/classes/$class.php" );
160 if ( $recursionLevel > 5 ) {
161 throw new MWException( "Language fallback loop detected when creating class $class\n" );
164 if ( !class_exists( $class ) ) {
165 $fallback = Language
::getFallbackFor( $code );
167 $lang = Language
::newFromCode( $fallback );
169 $lang->setCode( $code );
177 * Get the LocalisationCache instance
179 public static function getLocalisationCache() {
180 if ( is_null( self
::$dataCache ) ) {
181 global $wgLocalisationCacheConf;
182 $class = $wgLocalisationCacheConf['class'];
183 self
::$dataCache = new $class( $wgLocalisationCacheConf );
185 return self
::$dataCache;
188 function __construct() {
189 $this->mConverter
= new FakeConverter( $this );
190 // Set the code to the name of the descendant
191 if ( get_class( $this ) == 'Language' ) {
194 $this->mCode
= str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
196 self
::getLocalisationCache();
200 * Reduce memory usage
202 function __destruct() {
203 foreach ( $this as $name => $value ) {
204 unset( $this->$name );
209 * Hook which will be called if this is the content language.
210 * Descendants can use this to register hook functions or modify globals
212 function initContLang() { }
215 * @deprecated Use User::getDefaultOptions()
218 function getDefaultUserOptions() {
219 wfDeprecated( __METHOD__
);
220 return User
::getDefaultOptions();
223 function getFallbackLanguageCode() {
224 if ( $this->mCode
=== 'en' ) {
227 return self
::$dataCache->getItem( $this->mCode
, 'fallback' );
232 * Exports $wgBookstoreListEn
235 function getBookstoreList() {
236 return self
::$dataCache->getItem( $this->mCode
, 'bookstoreList' );
242 function getNamespaces() {
243 if ( is_null( $this->namespaceNames
) ) {
244 global $wgMetaNamespace, $wgMetaNamespaceTalk;
246 $this->namespaceNames
= self
::$dataCache->getItem( $this->mCode
, 'namespaceNames' );
247 $validNamespaces = MWNamespace
::getCanonicalNamespaces();
249 $this->namespaceNames
= $validNamespaces +
$this->namespaceNames
;
251 $this->namespaceNames
[NS_PROJECT
] = $wgMetaNamespace;
252 if ( $wgMetaNamespaceTalk ) {
253 $this->namespaceNames
[NS_PROJECT_TALK
] = $wgMetaNamespaceTalk;
255 $talk = $this->namespaceNames
[NS_PROJECT_TALK
];
256 $this->namespaceNames
[NS_PROJECT_TALK
] =
257 $this->fixVariableInNamespace( $talk );
260 # Sometimes a language will be localised but not actually exist on this wiki.
261 foreach( $this->namespaceNames
as $key => $text ) {
262 if ( !isset( $validNamespaces[$key] ) ) {
263 unset( $this->namespaceNames
[$key] );
267 # The above mixing may leave namespaces out of canonical order.
268 # Re-order by namespace ID number...
269 ksort( $this->namespaceNames
);
271 return $this->namespaceNames
;
275 * A convenience function that returns the same thing as
276 * getNamespaces() except with the array values changed to ' '
277 * where it found '_', useful for producing output to be displayed
278 * e.g. in <select> forms.
282 function getFormattedNamespaces() {
283 $ns = $this->getNamespaces();
284 foreach ( $ns as $k => $v ) {
285 $ns[$k] = strtr( $v, '_', ' ' );
291 * Get a namespace value by key
293 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
294 * echo $mw_ns; // prints 'MediaWiki'
297 * @param $index Int: the array key of the namespace to return
298 * @return mixed, string if the namespace value exists, otherwise false
300 function getNsText( $index ) {
301 $ns = $this->getNamespaces();
302 return isset( $ns[$index] ) ?
$ns[$index] : false;
306 * A convenience function that returns the same thing as
307 * getNsText() except with '_' changed to ' ', useful for
312 function getFormattedNsText( $index ) {
313 $ns = $this->getNsText( $index );
314 return strtr( $ns, '_', ' ' );
318 * Get a namespace key by value, case insensitive.
319 * Only matches namespace names for the current language, not the
320 * canonical ones defined in Namespace.php.
322 * @param $text String
323 * @return mixed An integer if $text is a valid value otherwise false
325 function getLocalNsIndex( $text ) {
326 $lctext = $this->lc( $text );
327 $ids = $this->getNamespaceIds();
328 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
331 function getNamespaceAliases() {
332 if ( is_null( $this->namespaceAliases
) ) {
333 $aliases = self
::$dataCache->getItem( $this->mCode
, 'namespaceAliases' );
337 foreach ( $aliases as $name => $index ) {
338 if ( $index === NS_PROJECT_TALK
) {
339 unset( $aliases[$name] );
340 $name = $this->fixVariableInNamespace( $name );
341 $aliases[$name] = $index;
345 $this->namespaceAliases
= $aliases;
347 return $this->namespaceAliases
;
350 function getNamespaceIds() {
351 if ( is_null( $this->mNamespaceIds
) ) {
352 global $wgNamespaceAliases;
353 # Put namespace names and aliases into a hashtable.
354 # If this is too slow, then we should arrange it so that it is done
355 # before caching. The catch is that at pre-cache time, the above
356 # class-specific fixup hasn't been done.
357 $this->mNamespaceIds
= array();
358 foreach ( $this->getNamespaces() as $index => $name ) {
359 $this->mNamespaceIds
[$this->lc( $name )] = $index;
361 foreach ( $this->getNamespaceAliases() as $name => $index ) {
362 $this->mNamespaceIds
[$this->lc( $name )] = $index;
364 if ( $wgNamespaceAliases ) {
365 foreach ( $wgNamespaceAliases as $name => $index ) {
366 $this->mNamespaceIds
[$this->lc( $name )] = $index;
370 return $this->mNamespaceIds
;
375 * Get a namespace key by value, case insensitive. Canonical namespace
376 * names override custom ones defined for the current language.
378 * @param $text String
379 * @return mixed An integer if $text is a valid value otherwise false
381 function getNsIndex( $text ) {
382 $lctext = $this->lc( $text );
383 if ( ( $ns = MWNamespace
::getCanonicalIndex( $lctext ) ) !== null ) {
386 $ids = $this->getNamespaceIds();
387 return isset( $ids[$lctext] ) ?
$ids[$lctext] : false;
391 * short names for language variants used for language conversion links.
393 * @param $code String
396 function getVariantname( $code ) {
397 return $this->getMessageFromDB( "variantname-$code" );
400 function specialPage( $name ) {
401 $aliases = $this->getSpecialPageAliases();
402 if ( isset( $aliases[$name][0] ) ) {
403 $name = $aliases[$name][0];
405 return $this->getNsText( NS_SPECIAL
) . ':' . $name;
408 function getQuickbarSettings() {
410 $this->getMessage( 'qbsettings-none' ),
411 $this->getMessage( 'qbsettings-fixedleft' ),
412 $this->getMessage( 'qbsettings-fixedright' ),
413 $this->getMessage( 'qbsettings-floatingleft' ),
414 $this->getMessage( 'qbsettings-floatingright' )
418 function getMathNames() {
419 return self
::$dataCache->getItem( $this->mCode
, 'mathNames' );
422 function getDatePreferences() {
423 return self
::$dataCache->getItem( $this->mCode
, 'datePreferences' );
426 function getDateFormats() {
427 return self
::$dataCache->getItem( $this->mCode
, 'dateFormats' );
430 function getDefaultDateFormat() {
431 $df = self
::$dataCache->getItem( $this->mCode
, 'defaultDateFormat' );
432 if ( $df === 'dmy or mdy' ) {
433 global $wgAmericanDates;
434 return $wgAmericanDates ?
'mdy' : 'dmy';
440 function getDatePreferenceMigrationMap() {
441 return self
::$dataCache->getItem( $this->mCode
, 'datePreferenceMigrationMap' );
444 function getImageFile( $image ) {
445 return self
::$dataCache->getSubitem( $this->mCode
, 'imageFiles', $image );
448 function getDefaultUserOptionOverrides() {
449 return self
::$dataCache->getItem( $this->mCode
, 'defaultUserOptionOverrides' );
452 function getExtraUserToggles() {
453 return self
::$dataCache->getItem( $this->mCode
, 'extraUserToggles' );
456 function getUserToggle( $tog ) {
457 return $this->getMessageFromDB( "tog-$tog" );
461 * Get language names, indexed by code.
462 * If $customisedOnly is true, only returns codes with a messages file
464 public static function getLanguageNames( $customisedOnly = false ) {
465 global $wgLanguageNames, $wgExtraLanguageNames;
466 $allNames = $wgExtraLanguageNames +
$wgLanguageNames;
467 if ( !$customisedOnly ) {
473 $dir = opendir( "$IP/languages/messages" );
474 while ( false !== ( $file = readdir( $dir ) ) ) {
475 $code = self
::getCodeFromFileName( $file, 'Messages' );
476 if ( $code && isset( $allNames[$code] ) ) {
477 $names[$code] = $allNames[$code];
485 * Get a message from the MediaWiki namespace.
487 * @param $msg String: message name
490 function getMessageFromDB( $msg ) {
491 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
494 function getLanguageName( $code ) {
495 $names = self
::getLanguageNames();
496 if ( !array_key_exists( $code, $names ) ) {
499 return $names[$code];
502 function getMonthName( $key ) {
503 return $this->getMessageFromDB( self
::$mMonthMsgs[$key - 1] );
506 function getMonthNameGen( $key ) {
507 return $this->getMessageFromDB( self
::$mMonthGenMsgs[$key - 1] );
510 function getMonthAbbreviation( $key ) {
511 return $this->getMessageFromDB( self
::$mMonthAbbrevMsgs[$key - 1] );
514 function getWeekdayName( $key ) {
515 return $this->getMessageFromDB( self
::$mWeekdayMsgs[$key - 1] );
518 function getWeekdayAbbreviation( $key ) {
519 return $this->getMessageFromDB( self
::$mWeekdayAbbrevMsgs[$key - 1] );
522 function getIranianCalendarMonthName( $key ) {
523 return $this->getMessageFromDB( self
::$mIranianCalendarMonthMsgs[$key - 1] );
526 function getHebrewCalendarMonthName( $key ) {
527 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthMsgs[$key - 1] );
530 function getHebrewCalendarMonthNameGen( $key ) {
531 return $this->getMessageFromDB( self
::$mHebrewCalendarMonthGenMsgs[$key - 1] );
534 function getHijriCalendarMonthName( $key ) {
535 return $this->getMessageFromDB( self
::$mHijriCalendarMonthMsgs[$key - 1] );
539 * Used by date() and time() to adjust the time output.
541 * @param $ts Int the time in date('YmdHis') format
542 * @param $tz Mixed: adjust the time by this amount (default false, mean we
543 * get user timecorrection setting)
546 function userAdjust( $ts, $tz = false ) {
547 global $wgUser, $wgLocalTZoffset;
549 if ( $tz === false ) {
550 $tz = $wgUser->getOption( 'timecorrection' );
553 $data = explode( '|', $tz, 3 );
555 if ( $data[0] == 'ZoneInfo' ) {
556 if ( function_exists( 'timezone_open' ) && @timezone_open
( $data[2] ) !== false ) {
557 $date = date_create( $ts, timezone_open( 'UTC' ) );
558 date_timezone_set( $date, timezone_open( $data[2] ) );
559 $date = date_format( $date, 'YmdHis' );
562 # Unrecognized timezone, default to 'Offset' with the stored offset.
567 if ( $data[0] == 'System' ||
$tz == '' ) {
568 # Global offset in minutes.
569 if ( isset( $wgLocalTZoffset ) ) {
570 $minDiff = $wgLocalTZoffset;
572 } else if ( $data[0] == 'Offset' ) {
573 $minDiff = intval( $data[1] );
575 $data = explode( ':', $tz );
576 if ( count( $data ) == 2 ) {
577 $data[0] = intval( $data[0] );
578 $data[1] = intval( $data[1] );
579 $minDiff = abs( $data[0] ) * 60 +
$data[1];
580 if ( $data[0] < 0 ) {
581 $minDiff = -$minDiff;
584 $minDiff = intval( $data[0] ) * 60;
588 # No difference ? Return time unchanged
589 if ( 0 == $minDiff ) {
593 wfSuppressWarnings(); // E_STRICT system time bitching
594 # Generate an adjusted date; take advantage of the fact that mktime
595 # will normalize out-of-range values so we don't have to split $minDiff
596 # into hours and minutes.
598 (int)substr( $ts, 8, 2 ) ), # Hours
599 (int)substr( $ts, 10, 2 ) +
$minDiff, # Minutes
600 (int)substr( $ts, 12, 2 ), # Seconds
601 (int)substr( $ts, 4, 2 ), # Month
602 (int)substr( $ts, 6, 2 ), # Day
603 (int)substr( $ts, 0, 4 ) ); # Year
605 $date = date( 'YmdHis', $t );
612 * This is a workalike of PHP's date() function, but with better
613 * internationalisation, a reduced set of format characters, and a better
616 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
617 * PHP manual for definitions. "o" format character is supported since
618 * PHP 5.1.0, previous versions return literal o.
619 * There are a number of extensions, which start with "x":
621 * xn Do not translate digits of the next numeric format character
622 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
623 * xr Use roman numerals for the next numeric format character
624 * xh Use hebrew numerals for the next numeric format character
626 * xg Genitive month name
628 * xij j (day number) in Iranian calendar
629 * xiF F (month name) in Iranian calendar
630 * xin n (month number) in Iranian calendar
631 * xiY Y (full year) in Iranian calendar
633 * xjj j (day number) in Hebrew calendar
634 * xjF F (month name) in Hebrew calendar
635 * xjt t (days in month) in Hebrew calendar
636 * xjx xg (genitive month name) in Hebrew calendar
637 * xjn n (month number) in Hebrew calendar
638 * xjY Y (full year) in Hebrew calendar
640 * xmj j (day number) in Hijri calendar
641 * xmF F (month name) in Hijri calendar
642 * xmn n (month number) in Hijri calendar
643 * xmY Y (full year) in Hijri calendar
645 * xkY Y (full year) in Thai solar calendar. Months and days are
646 * identical to the Gregorian calendar
647 * xoY Y (full year) in Minguo calendar or Juche year.
648 * Months and days are identical to the
650 * xtY Y (full year) in Japanese nengo. Months and days are
651 * identical to the Gregorian calendar
653 * Characters enclosed in double quotes will be considered literal (with
654 * the quotes themselves removed). Unmatched quotes will be considered
655 * literal quotes. Example:
657 * "The month is" F => The month is January
660 * Backslash escaping is also supported.
662 * Input timestamp is assumed to be pre-normalized to the desired local
665 * @param $format String
666 * @param $ts String: 14-character timestamp
669 * @todo emulation of "o" format character for PHP pre 5.1.0
670 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
672 function sprintfDate( $format, $ts ) {
685 for ( $p = 0; $p < strlen( $format ); $p++
) {
688 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
689 $code .= $format[++
$p];
692 if ( ( $code === 'xi' ||
$code == 'xj' ||
$code == 'xk' ||
$code == 'xm' ||
$code == 'xo' ||
$code == 'xt' ) && $p < strlen( $format ) - 1 ) {
693 $code .= $format[++
$p];
704 $rawToggle = !$rawToggle;
713 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
716 if ( !$hebrew ) $hebrew = self
::tsToHebrew( $ts );
717 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
720 $num = substr( $ts, 6, 2 );
723 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
724 $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) +
1 );
727 $num = intval( substr( $ts, 6, 2 ) );
731 $iranian = self
::tsToIranian( $ts );
737 $hijri = self
::tsToHijri( $ts );
743 $hebrew = self
::tsToHebrew( $ts );
749 $unix = wfTimestamp( TS_UNIX
, $ts );
751 $s .= $this->getWeekdayName( gmdate( 'w', $unix ) +
1 );
755 $unix = wfTimestamp( TS_UNIX
, $ts );
757 $w = gmdate( 'w', $unix );
762 $unix = wfTimestamp( TS_UNIX
, $ts );
764 $num = gmdate( 'w', $unix );
768 $unix = wfTimestamp( TS_UNIX
, $ts );
770 $num = gmdate( 'z', $unix );
774 $unix = wfTimestamp( TS_UNIX
, $ts );
776 $num = gmdate( 'W', $unix );
779 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
783 $iranian = self
::tsToIranian( $ts );
785 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
789 $hijri = self
::tsToHijri( $ts );
791 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
795 $hebrew = self
::tsToHebrew( $ts );
797 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
800 $num = substr( $ts, 4, 2 );
803 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
806 $num = intval( substr( $ts, 4, 2 ) );
810 $iranian = self
::tsToIranian( $ts );
816 $hijri = self
::tsToHijri ( $ts );
822 $hebrew = self
::tsToHebrew( $ts );
828 $unix = wfTimestamp( TS_UNIX
, $ts );
830 $num = gmdate( 't', $unix );
834 $hebrew = self
::tsToHebrew( $ts );
840 $unix = wfTimestamp( TS_UNIX
, $ts );
842 $num = gmdate( 'L', $unix );
844 # 'o' is supported since PHP 5.1.0
845 # return literal if not supported
846 # TODO: emulation for pre 5.1.0 versions
849 $unix = wfTimestamp( TS_UNIX
, $ts );
851 if ( version_compare( PHP_VERSION
, '5.1.0' ) === 1 ) {
852 $num = date( 'o', $unix );
858 $num = substr( $ts, 0, 4 );
862 $iranian = self
::tsToIranian( $ts );
868 $hijri = self
::tsToHijri( $ts );
874 $hebrew = self
::tsToHebrew( $ts );
880 $thai = self
::tsToYear( $ts, 'thai' );
886 $minguo = self
::tsToYear( $ts, 'minguo' );
892 $tenno = self
::tsToYear( $ts, 'tenno' );
897 $num = substr( $ts, 2, 2 );
900 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'am' : 'pm';
903 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'AM' : 'PM';
906 $h = substr( $ts, 8, 2 );
907 $num = $h %
12 ?
$h %
12 : 12;
910 $num = intval( substr( $ts, 8, 2 ) );
913 $h = substr( $ts, 8, 2 );
914 $num = sprintf( '%02d', $h %
12 ?
$h %
12 : 12 );
917 $num = substr( $ts, 8, 2 );
920 $num = substr( $ts, 10, 2 );
923 $num = substr( $ts, 12, 2 );
927 $unix = wfTimestamp( TS_UNIX
, $ts );
929 $s .= gmdate( 'c', $unix );
933 $unix = wfTimestamp( TS_UNIX
, $ts );
935 $s .= gmdate( 'r', $unix );
939 $unix = wfTimestamp( TS_UNIX
, $ts );
945 if ( $p < strlen( $format ) - 1 ) {
953 if ( $p < strlen( $format ) - 1 ) {
954 $endQuote = strpos( $format, '"', $p +
1 );
955 if ( $endQuote === false ) {
956 # No terminating quote, assume literal "
959 $s .= substr( $format, $p +
1, $endQuote - $p - 1 );
963 # Quote at end of string, assume literal "
970 if ( $num !== false ) {
971 if ( $rawToggle ||
$raw ) {
974 } elseif ( $roman ) {
975 $s .= self
::romanNumeral( $num );
977 } elseif ( $hebrewNum ) {
978 $s .= self
::hebrewNumeral( $num );
981 $s .= $this->formatNum( $num, true );
989 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
990 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
992 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
993 * Gregorian dates to Iranian dates. Originally written in C, it
994 * is released under the terms of GNU Lesser General Public
995 * License. Conversion to PHP was performed by Niklas Laxström.
997 * Link: http://www.farsiweb.info/jalali/jalali.c
999 private static function tsToIranian( $ts ) {
1000 $gy = substr( $ts, 0, 4 ) -1600;
1001 $gm = substr( $ts, 4, 2 ) -1;
1002 $gd = substr( $ts, 6, 2 ) -1;
1004 # Days passed from the beginning (including leap years)
1006 +
floor( ( $gy +
3 ) / 4 )
1007 - floor( ( $gy +
99 ) / 100 )
1008 +
floor( ( $gy +
399 ) / 400 );
1011 // Add days of the past months of this year
1012 for ( $i = 0; $i < $gm; $i++
) {
1013 $gDayNo +
= self
::$GREG_DAYS[$i];
1017 if ( $gm > 1 && ( ( $gy %
4 === 0 && $gy %
100 !== 0 ||
( $gy %
400 == 0 ) ) ) ) {
1021 // Days passed in current month
1024 $jDayNo = $gDayNo - 79;
1026 $jNp = floor( $jDayNo / 12053 );
1029 $jy = 979 +
33 * $jNp +
4 * floor( $jDayNo / 1461 );
1032 if ( $jDayNo >= 366 ) {
1033 $jy +
= floor( ( $jDayNo - 1 ) / 365 );
1034 $jDayNo = floor( ( $jDayNo - 1 ) %
365 );
1037 for ( $i = 0; $i < 11 && $jDayNo >= self
::$IRANIAN_DAYS[$i]; $i++
) {
1038 $jDayNo -= self
::$IRANIAN_DAYS[$i];
1044 return array( $jy, $jm, $jd );
1048 * Converting Gregorian dates to Hijri dates.
1050 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1052 * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1054 private static function tsToHijri( $ts ) {
1055 $year = substr( $ts, 0, 4 );
1056 $month = substr( $ts, 4, 2 );
1057 $day = substr( $ts, 6, 2 );
1065 ( $zy > 1582 ) ||
( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1066 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1069 $zjd = (int)( ( 1461 * ( $zy +
4800 +
(int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1070 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1071 (int)( ( 3 * (int)( ( ( $zy +
4900 +
(int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1074 $zjd = 367 * $zy - (int)( ( 7 * ( $zy +
5001 +
(int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1075 (int)( ( 275 * $zm ) / 9 ) +
$zd +
1729777;
1078 $zl = $zjd -1948440 +
10632;
1079 $zn = (int)( ( $zl - 1 ) / 10631 );
1080 $zl = $zl - 10631 * $zn +
354;
1081 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) +
( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1082 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) +
29;
1083 $zm = (int)( ( 24 * $zl ) / 709 );
1084 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1085 $zy = 30 * $zn +
$zj - 30;
1087 return array( $zy, $zm, $zd );
1091 * Converting Gregorian dates to Hebrew dates.
1093 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1094 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1095 * to translate the relevant functions into PHP and release them under
1098 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1099 * and Adar II is 14. In a non-leap year, Adar is 6.
1101 private static function tsToHebrew( $ts ) {
1103 $year = substr( $ts, 0, 4 );
1104 $month = substr( $ts, 4, 2 );
1105 $day = substr( $ts, 6, 2 );
1107 # Calculate Hebrew year
1108 $hebrewYear = $year +
3760;
1110 # Month number when September = 1, August = 12
1112 if ( $month > 12 ) {
1119 # Calculate day of year from 1 September
1121 for ( $i = 1; $i < $month; $i++
) {
1125 # Check if the year is leap
1126 if ( $year %
400 == 0 ||
( $year %
4 == 0 && $year %
100 > 0 ) ) {
1129 } elseif ( $i == 8 ||
$i == 10 ||
$i == 1 ||
$i == 3 ) {
1136 # Calculate the start of the Hebrew year
1137 $start = self
::hebrewYearStart( $hebrewYear );
1139 # Calculate next year's start
1140 if ( $dayOfYear <= $start ) {
1141 # Day is before the start of the year - it is the previous year
1143 $nextStart = $start;
1147 # Add days since previous year's 1 September
1149 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1153 # Start of the new (previous) year
1154 $start = self
::hebrewYearStart( $hebrewYear );
1157 $nextStart = self
::hebrewYearStart( $hebrewYear +
1 );
1160 # Calculate Hebrew day of year
1161 $hebrewDayOfYear = $dayOfYear - $start;
1163 # Difference between year's days
1164 $diff = $nextStart - $start;
1165 # Add 12 (or 13 for leap years) days to ignore the difference between
1166 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1167 # difference is only about the year type
1168 if ( ( $year %
400 == 0 ) ||
( $year %
100 != 0 && $year %
4 == 0 ) ) {
1174 # Check the year pattern, and is leap year
1175 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1176 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1177 # and non-leap years
1178 $yearPattern = $diff %
30;
1179 # Check if leap year
1180 $isLeap = $diff >= 30;
1182 # Calculate day in the month from number of day in the Hebrew year
1183 # Don't check Adar - if the day is not in Adar, we will stop before;
1184 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1185 $hebrewDay = $hebrewDayOfYear;
1188 while ( $hebrewMonth <= 12 ) {
1189 # Calculate days in this month
1190 if ( $isLeap && $hebrewMonth == 6 ) {
1191 # Adar in a leap year
1193 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1195 if ( $hebrewDay <= $days ) {
1199 # Subtract the days of Adar I
1200 $hebrewDay -= $days;
1203 if ( $hebrewDay <= $days ) {
1209 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1210 # Cheshvan in a complete year (otherwise as the rule below)
1212 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1213 # Kislev in an incomplete year (otherwise as the rule below)
1216 # Odd months have 30 days, even have 29
1217 $days = 30 - ( $hebrewMonth - 1 ) %
2;
1219 if ( $hebrewDay <= $days ) {
1220 # In the current month
1223 # Subtract the days of the current month
1224 $hebrewDay -= $days;
1225 # Try in the next month
1230 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1234 * This calculates the Hebrew year start, as days since 1 September.
1235 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1236 * Used for Hebrew date.
1238 private static function hebrewYearStart( $year ) {
1239 $a = intval( ( 12 * ( $year - 1 ) +
17 ) %
19 );
1240 $b = intval( ( $year - 1 ) %
4 );
1241 $m = 32.044093161144 +
1.5542417966212 * $a +
$b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1245 $Mar = intval( $m );
1251 $c = intval( ( $Mar +
3 * ( $year - 1 ) +
5 * $b +
5 ) %
7 );
1252 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1254 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1256 } else if ( $c == 2 ||
$c == 4 ||
$c == 6 ) {
1260 $Mar +
= intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1265 * Algorithm to convert Gregorian dates to Thai solar dates,
1266 * Minguo dates or Minguo dates.
1268 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1269 * http://en.wikipedia.org/wiki/Minguo_calendar
1270 * http://en.wikipedia.org/wiki/Japanese_era_name
1272 * @param $ts String: 14-character timestamp
1273 * @param $cName String: calender name
1274 * @return Array: converted year, month, day
1276 private static function tsToYear( $ts, $cName ) {
1277 $gy = substr( $ts, 0, 4 );
1278 $gm = substr( $ts, 4, 2 );
1279 $gd = substr( $ts, 6, 2 );
1281 if ( !strcmp( $cName, 'thai' ) ) {
1283 # Add 543 years to the Gregorian calendar
1284 # Months and days are identical
1285 $gy_offset = $gy +
543;
1286 } else if ( ( !strcmp( $cName, 'minguo' ) ) ||
!strcmp( $cName, 'juche' ) ) {
1288 # Deduct 1911 years from the Gregorian calendar
1289 # Months and days are identical
1290 $gy_offset = $gy - 1911;
1291 } else if ( !strcmp( $cName, 'tenno' ) ) {
1292 # Nengō dates up to Meiji period
1293 # Deduct years from the Gregorian calendar
1294 # depending on the nengo periods
1295 # Months and days are identical
1296 if ( ( $gy < 1912 ) ||
( ( $gy == 1912 ) && ( $gm < 7 ) ) ||
( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1298 $gy_gannen = $gy - 1868 +
1;
1299 $gy_offset = $gy_gannen;
1300 if ( $gy_gannen == 1 ) {
1303 $gy_offset = '明治' . $gy_offset;
1305 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1306 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1307 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1308 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1309 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1313 $gy_gannen = $gy - 1912 +
1;
1314 $gy_offset = $gy_gannen;
1315 if ( $gy_gannen == 1 ) {
1318 $gy_offset = '大正' . $gy_offset;
1320 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1321 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1322 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1326 $gy_gannen = $gy - 1926 +
1;
1327 $gy_offset = $gy_gannen;
1328 if ( $gy_gannen == 1 ) {
1331 $gy_offset = '昭和' . $gy_offset;
1334 $gy_gannen = $gy - 1989 +
1;
1335 $gy_offset = $gy_gannen;
1336 if ( $gy_gannen == 1 ) {
1339 $gy_offset = '平成' . $gy_offset;
1345 return array( $gy_offset, $gm, $gd );
1349 * Roman number formatting up to 3000
1351 static function romanNumeral( $num ) {
1352 static $table = array(
1353 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1354 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1355 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1356 array( '', 'M', 'MM', 'MMM' )
1359 $num = intval( $num );
1360 if ( $num > 3000 ||
$num <= 0 ) {
1365 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1366 if ( $num >= $pow10 ) {
1367 $s .= $table[$i][floor( $num / $pow10 )];
1369 $num = $num %
$pow10;
1375 * Hebrew Gematria number formatting up to 9999
1377 static function hebrewNumeral( $num ) {
1378 static $table = array(
1379 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1380 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1381 array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1382 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1385 $num = intval( $num );
1386 if ( $num > 9999 ||
$num <= 0 ) {
1391 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1392 if ( $num >= $pow10 ) {
1393 if ( $num == 15 ||
$num == 16 ) {
1394 $s .= $table[0][9] . $table[0][$num - 9];
1397 $s .= $table[$i][intval( ( $num / $pow10 ) )];
1398 if ( $pow10 == 1000 ) {
1403 $num = $num %
$pow10;
1405 if ( strlen( $s ) == 2 ) {
1408 $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1409 $str .= substr( $s, strlen( $s ) - 2, 2 );
1411 $start = substr( $str, 0, strlen( $str ) - 2 );
1412 $end = substr( $str, strlen( $str ) - 2 );
1415 $str = $start . 'ך';
1418 $str = $start . 'ם';
1421 $str = $start . 'ן';
1424 $str = $start . 'ף';
1427 $str = $start . 'ץ';
1434 * This is meant to be used by time(), date(), and timeanddate() to get
1435 * the date preference they're supposed to use, it should be used in
1439 * function timeanddate([...], $format = true) {
1440 * $datePreference = $this->dateFormat($format);
1445 * @param $usePrefs Mixed: if true, the user's preference is used
1446 * if false, the site/language default is used
1447 * if int/string, assumed to be a format.
1450 function dateFormat( $usePrefs = true ) {
1453 if ( is_bool( $usePrefs ) ) {
1455 $datePreference = $wgUser->getDatePreference();
1457 $datePreference = (string)User
::getDefaultOption( 'date' );
1460 $datePreference = (string)$usePrefs;
1464 if ( $datePreference == '' ) {
1468 return $datePreference;
1472 * Get a format string for a given type and preference
1473 * @param $type May be date, time or both
1474 * @param $pref The format name as it appears in Messages*.php
1476 function getDateFormatString( $type, $pref ) {
1477 if ( !isset( $this->dateFormatStrings
[$type][$pref] ) ) {
1478 if ( $pref == 'default' ) {
1479 $pref = $this->getDefaultDateFormat();
1480 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1482 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1483 if ( is_null( $df ) ) {
1484 $pref = $this->getDefaultDateFormat();
1485 $df = self
::$dataCache->getSubitem( $this->mCode
, 'dateFormats', "$pref $type" );
1488 $this->dateFormatStrings
[$type][$pref] = $df;
1490 return $this->dateFormatStrings
[$type][$pref];
1494 * @param $ts Mixed: the time format which needs to be turned into a
1495 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1496 * @param $adj Bool: whether to adjust the time output according to the
1497 * user configured offset ($timecorrection)
1498 * @param $format Mixed: true to use user's date format preference
1499 * @param $timecorrection String: the time offset as returned by
1500 * validateTimeZone() in Special:Preferences
1503 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1505 $ts = $this->userAdjust( $ts, $timecorrection );
1507 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1508 return $this->sprintfDate( $df, $ts );
1512 * @param $ts Mixed: the time format which needs to be turned into a
1513 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1514 * @param $adj Bool: whether to adjust the time output according to the
1515 * user configured offset ($timecorrection)
1516 * @param $format Mixed: true to use user's date format preference
1517 * @param $timecorrection String: the time offset as returned by
1518 * validateTimeZone() in Special:Preferences
1521 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1523 $ts = $this->userAdjust( $ts, $timecorrection );
1525 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1526 return $this->sprintfDate( $df, $ts );
1530 * @param $ts Mixed: the time format which needs to be turned into a
1531 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1532 * @param $adj Bool: whether to adjust the time output according to the
1533 * user configured offset ($timecorrection)
1534 * @param $format Mixed: what format to return, if it's false output the
1535 * default one (default true)
1536 * @param $timecorrection String: the time offset as returned by
1537 * validateTimeZone() in Special:Preferences
1540 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1541 $ts = wfTimestamp( TS_MW
, $ts );
1543 $ts = $this->userAdjust( $ts, $timecorrection );
1545 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1546 return $this->sprintfDate( $df, $ts );
1549 function getMessage( $key ) {
1550 return self
::$dataCache->getSubitem( $this->mCode
, 'messages', $key );
1553 function getAllMessages() {
1554 return self
::$dataCache->getItem( $this->mCode
, 'messages' );
1557 function iconv( $in, $out, $string ) {
1558 # This is a wrapper for iconv in all languages except esperanto,
1559 # which does some nasty x-conversions beforehand
1561 # Even with //IGNORE iconv can whine about illegal characters in
1562 # *input* string. We just ignore those too.
1563 # REF: http://bugs.php.net/bug.php?id=37166
1564 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1565 wfSuppressWarnings();
1566 $text = iconv( $in, $out . '//IGNORE', $string );
1567 wfRestoreWarnings();
1571 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1572 function ucwordbreaksCallbackAscii( $matches ) {
1573 return $this->ucfirst( $matches[1] );
1576 function ucwordbreaksCallbackMB( $matches ) {
1577 return mb_strtoupper( $matches[0] );
1580 function ucCallback( $matches ) {
1581 list( $wikiUpperChars ) = self
::getCaseMaps();
1582 return strtr( $matches[1], $wikiUpperChars );
1585 function lcCallback( $matches ) {
1586 list( , $wikiLowerChars ) = self
::getCaseMaps();
1587 return strtr( $matches[1], $wikiLowerChars );
1590 function ucwordsCallbackMB( $matches ) {
1591 return mb_strtoupper( $matches[0] );
1594 function ucwordsCallbackWiki( $matches ) {
1595 list( $wikiUpperChars ) = self
::getCaseMaps();
1596 return strtr( $matches[0], $wikiUpperChars );
1599 function ucfirst( $str ) {
1603 } elseif ( $o < 128 ) {
1604 return ucfirst( $str );
1606 // fall back to more complex logic in case of multibyte strings
1607 return $this->uc( $str, true );
1611 function uc( $str, $first = false ) {
1612 if ( function_exists( 'mb_strtoupper' ) ) {
1614 if ( $this->isMultibyte( $str ) ) {
1615 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1617 return ucfirst( $str );
1620 return $this->isMultibyte( $str ) ?
mb_strtoupper( $str ) : strtoupper( $str );
1623 if ( $this->isMultibyte( $str ) ) {
1624 $x = $first ?
'^' : '';
1625 return preg_replace_callback(
1626 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1627 array( $this, 'ucCallback' ),
1631 return $first ?
ucfirst( $str ) : strtoupper( $str );
1636 function lcfirst( $str ) {
1639 return strval( $str );
1640 } elseif ( $o >= 128 ) {
1641 return $this->lc( $str, true );
1642 } elseif ( $o > 96 ) {
1645 $str[0] = strtolower( $str[0] );
1650 function lc( $str, $first = false ) {
1651 if ( function_exists( 'mb_strtolower' ) ) {
1653 if ( $this->isMultibyte( $str ) ) {
1654 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1656 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1659 return $this->isMultibyte( $str ) ?
mb_strtolower( $str ) : strtolower( $str );
1662 if ( $this->isMultibyte( $str ) ) {
1663 $x = $first ?
'^' : '';
1664 return preg_replace_callback(
1665 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1666 array( $this, 'lcCallback' ),
1670 return $first ?
strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1675 function isMultibyte( $str ) {
1676 return (bool)preg_match( '/[\x80-\xff]/', $str );
1679 function ucwords( $str ) {
1680 if ( $this->isMultibyte( $str ) ) {
1681 $str = $this->lc( $str );
1683 // regexp to find first letter in each word (i.e. after each space)
1684 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1686 // function to use to capitalize a single char
1687 if ( function_exists( 'mb_strtoupper' ) ) {
1688 return preg_replace_callback(
1690 array( $this, 'ucwordsCallbackMB' ),
1694 return preg_replace_callback(
1696 array( $this, 'ucwordsCallbackWiki' ),
1701 return ucwords( strtolower( $str ) );
1705 # capitalize words at word breaks
1706 function ucwordbreaks( $str ) {
1707 if ( $this->isMultibyte( $str ) ) {
1708 $str = $this->lc( $str );
1710 // since \b doesn't work for UTF-8, we explicitely define word break chars
1711 $breaks = "[ \-\(\)\}\{\.,\?!]";
1713 // find first letter after word break
1714 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1716 if ( function_exists( 'mb_strtoupper' ) ) {
1717 return preg_replace_callback(
1719 array( $this, 'ucwordbreaksCallbackMB' ),
1723 return preg_replace_callback(
1725 array( $this, 'ucwordsCallbackWiki' ),
1730 return preg_replace_callback(
1731 '/\b([\w\x80-\xff]+)\b/',
1732 array( $this, 'ucwordbreaksCallbackAscii' ),
1739 * Return a case-folded representation of $s
1741 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1742 * and $s2 are the same except for the case of their characters. It is not
1743 * necessary for the value returned to make sense when displayed.
1745 * Do *not* perform any other normalisation in this function. If a caller
1746 * uses this function when it should be using a more general normalisation
1747 * function, then fix the caller.
1749 function caseFold( $s ) {
1750 return $this->uc( $s );
1753 function checkTitleEncoding( $s ) {
1754 if ( is_array( $s ) ) {
1755 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1757 # Check for non-UTF-8 URLs
1758 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1763 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1764 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1769 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1772 function fallback8bitEncoding() {
1773 return self
::$dataCache->getItem( $this->mCode
, 'fallback8bitEncoding' );
1777 * Most writing systems use whitespace to break up words.
1778 * Some languages such as Chinese don't conventionally do this,
1779 * which requires special handling when breaking up words for
1782 function hasWordBreaks() {
1787 * Some languages such as Chinese require word segmentation,
1788 * Specify such segmentation when overridden in derived class.
1790 * @param $string String
1793 function segmentByWord( $string ) {
1798 * Some languages have special punctuation need to be normalized.
1799 * Make such changes here.
1801 * @param $string String
1804 function normalizeForSearch( $string ) {
1805 return self
::convertDoubleWidth( $string );
1809 * convert double-width roman characters to single-width.
1810 * range: ff00-ff5f ~= 0020-007f
1812 protected static function convertDoubleWidth( $string ) {
1813 static $full = null;
1814 static $half = null;
1816 if ( $full === null ) {
1817 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1818 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1819 $full = str_split( $fullWidth, 3 );
1820 $half = str_split( $halfWidth );
1823 $string = str_replace( $full, $half, $string );
1827 protected static function insertSpace( $string, $pattern ) {
1828 $string = preg_replace( $pattern, " $1 ", $string );
1829 $string = preg_replace( '/ +/', ' ', $string );
1833 function convertForSearchResult( $termsArray ) {
1834 # some languages, e.g. Chinese, need to do a conversion
1835 # in order for search results to be displayed correctly
1840 * Get the first character of a string.
1845 function firstChar( $s ) {
1848 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1849 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1854 if ( isset( $matches[1] ) ) {
1855 if ( strlen( $matches[1] ) != 3 ) {
1859 // Break down Hangul syllables to grab the first jamo
1860 $code = utf8ToCodepoint( $matches[1] );
1861 if ( $code < 0xac00 ||
0xd7a4 <= $code ) {
1863 } elseif ( $code < 0xb098 ) {
1864 return "\xe3\x84\xb1";
1865 } elseif ( $code < 0xb2e4 ) {
1866 return "\xe3\x84\xb4";
1867 } elseif ( $code < 0xb77c ) {
1868 return "\xe3\x84\xb7";
1869 } elseif ( $code < 0xb9c8 ) {
1870 return "\xe3\x84\xb9";
1871 } elseif ( $code < 0xbc14 ) {
1872 return "\xe3\x85\x81";
1873 } elseif ( $code < 0xc0ac ) {
1874 return "\xe3\x85\x82";
1875 } elseif ( $code < 0xc544 ) {
1876 return "\xe3\x85\x85";
1877 } elseif ( $code < 0xc790 ) {
1878 return "\xe3\x85\x87";
1879 } elseif ( $code < 0xcc28 ) {
1880 return "\xe3\x85\x88";
1881 } elseif ( $code < 0xce74 ) {
1882 return "\xe3\x85\x8a";
1883 } elseif ( $code < 0xd0c0 ) {
1884 return "\xe3\x85\x8b";
1885 } elseif ( $code < 0xd30c ) {
1886 return "\xe3\x85\x8c";
1887 } elseif ( $code < 0xd558 ) {
1888 return "\xe3\x85\x8d";
1890 return "\xe3\x85\x8e";
1897 function initEncoding() {
1898 # Some languages may have an alternate char encoding option
1899 # (Esperanto X-coding, Japanese furigana conversion, etc)
1900 # If this language is used as the primary content language,
1901 # an override to the defaults can be set here on startup.
1904 function recodeForEdit( $s ) {
1905 # For some languages we'll want to explicitly specify
1906 # which characters make it into the edit box raw
1907 # or are converted in some way or another.
1908 # Note that if wgOutputEncoding is different from
1909 # wgInputEncoding, this text will be further converted
1910 # to wgOutputEncoding.
1911 global $wgEditEncoding;
1912 if ( $wgEditEncoding == '' ||
$wgEditEncoding == 'UTF-8' ) {
1915 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1919 function recodeInput( $s ) {
1920 # Take the previous into account.
1921 global $wgEditEncoding;
1922 if ( $wgEditEncoding != '' ) {
1923 $enc = $wgEditEncoding;
1927 if ( $enc == 'UTF-8' ) {
1930 return $this->iconv( $enc, 'UTF-8', $s );
1935 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
1936 * also cleans up certain backwards-compatible sequences, converting them
1937 * to the modern Unicode equivalent.
1939 * This is language-specific for performance reasons only.
1941 function normalize( $s ) {
1942 global $wgAllUnicodeFixes;
1943 $s = UtfNormal
::cleanUp( $s );
1944 if ( $wgAllUnicodeFixes ) {
1945 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
1946 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
1953 * Transform a string using serialized data stored in the given file (which
1954 * must be in the serialized subdirectory of $IP). The file contains pairs
1955 * mapping source characters to destination characters.
1957 * The data is cached in process memory. This will go faster if you have the
1958 * FastStringSearch extension.
1960 function transformUsingPairFile( $file, $string ) {
1961 if ( !isset( $this->transformData
[$file] ) ) {
1962 $data = wfGetPrecompiledData( $file );
1963 if ( $data === false ) {
1964 throw new MWException( __METHOD__
. ": The transformation file $file is missing" );
1966 $this->transformData
[$file] = new ReplacementArray( $data );
1968 return $this->transformData
[$file]->replace( $string );
1972 * For right-to-left language support
1977 return self
::$dataCache->getItem( $this->mCode
, 'rtl' );
1981 * Return the correct HTML 'dir' attribute value for this language.
1985 return $this->isRTL() ?
'rtl' : 'ltr';
1989 * Return 'left' or 'right' as appropriate alignment for line-start
1990 * for this language's text direction.
1992 * Should be equivalent to CSS3 'start' text-align value....
1996 function alignStart() {
1997 return $this->isRTL() ?
'right' : 'left';
2001 * Return 'right' or 'left' as appropriate alignment for line-end
2002 * for this language's text direction.
2004 * Should be equivalent to CSS3 'end' text-align value....
2008 function alignEnd() {
2009 return $this->isRTL() ?
'left' : 'right';
2013 * A hidden direction mark (LRM or RLM), depending on the language direction
2017 function getDirMark() {
2018 return $this->isRTL() ?
"\xE2\x80\x8F" : "\xE2\x80\x8E";
2021 function capitalizeAllNouns() {
2022 return self
::$dataCache->getItem( $this->mCode
, 'capitalizeAllNouns' );
2026 * An arrow, depending on the language direction
2030 function getArrow() {
2031 return $this->isRTL() ?
'←' : '→';
2035 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2039 function linkPrefixExtension() {
2040 return self
::$dataCache->getItem( $this->mCode
, 'linkPrefixExtension' );
2043 function getMagicWords() {
2044 return self
::$dataCache->getItem( $this->mCode
, 'magicWords' );
2047 # Fill a MagicWord object with data from here
2048 function getMagic( $mw ) {
2049 if ( !$this->mMagicHookDone
) {
2050 $this->mMagicHookDone
= true;
2051 wfProfileIn( 'LanguageGetMagic' );
2052 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions
, $this->getCode() ) );
2053 wfProfileOut( 'LanguageGetMagic' );
2055 if ( isset( $this->mMagicExtensions
[$mw->mId
] ) ) {
2056 $rawEntry = $this->mMagicExtensions
[$mw->mId
];
2058 $magicWords = $this->getMagicWords();
2059 if ( isset( $magicWords[$mw->mId
] ) ) {
2060 $rawEntry = $magicWords[$mw->mId
];
2066 if ( !is_array( $rawEntry ) ) {
2067 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2069 $mw->mCaseSensitive
= $rawEntry[0];
2070 $mw->mSynonyms
= array_slice( $rawEntry, 1 );
2075 * Add magic words to the extension array
2077 function addMagicWordsByLang( $newWords ) {
2078 $code = $this->getCode();
2079 $fallbackChain = array();
2080 while ( $code && !in_array( $code, $fallbackChain ) ) {
2081 $fallbackChain[] = $code;
2082 $code = self
::getFallbackFor( $code );
2084 if ( !in_array( 'en', $fallbackChain ) ) {
2085 $fallbackChain[] = 'en';
2087 $fallbackChain = array_reverse( $fallbackChain );
2088 foreach ( $fallbackChain as $code ) {
2089 if ( isset( $newWords[$code] ) ) {
2090 $this->mMagicExtensions
= $newWords[$code] +
$this->mMagicExtensions
;
2096 * Get special page names, as an associative array
2097 * case folded alias => real name
2099 function getSpecialPageAliases() {
2100 // Cache aliases because it may be slow to load them
2101 if ( is_null( $this->mExtendedSpecialPageAliases
) ) {
2103 $this->mExtendedSpecialPageAliases
=
2104 self
::$dataCache->getItem( $this->mCode
, 'specialPageAliases' );
2105 wfRunHooks( 'LanguageGetSpecialPageAliases',
2106 array( &$this->mExtendedSpecialPageAliases
, $this->getCode() ) );
2109 return $this->mExtendedSpecialPageAliases
;
2113 * Italic is unsuitable for some languages
2115 * @param $text String: the text to be emphasized.
2118 function emphasize( $text ) {
2119 return "<em>$text</em>";
2123 * Normally we output all numbers in plain en_US style, that is
2124 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2125 * point twohundredthirtyfive. However this is not sutable for all
2126 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2127 * Icelandic just want to use commas instead of dots, and dots instead
2128 * of commas like "293.291,235".
2130 * An example of this function being called:
2132 * wfMsg( 'message', $wgLang->formatNum( $num ) )
2135 * See LanguageGu.php for the Gujarati implementation and
2136 * $separatorTransformTable on MessageIs.php for
2137 * the , => . and . => , implementation.
2139 * @todo check if it's viable to use localeconv() for the decimal
2141 * @param $number Mixed: the string to be formatted, should be an integer
2142 * or a floating point number.
2143 * @param $nocommafy Bool: set to true for special numbers like dates
2146 function formatNum( $number, $nocommafy = false ) {
2147 global $wgTranslateNumerals;
2148 if ( !$nocommafy ) {
2149 $number = $this->commafy( $number );
2150 $s = $this->separatorTransformTable();
2152 $number = strtr( $number, $s );
2156 if ( $wgTranslateNumerals ) {
2157 $s = $this->digitTransformTable();
2159 $number = strtr( $number, $s );
2166 function parseFormattedNumber( $number ) {
2167 $s = $this->digitTransformTable();
2169 $number = strtr( $number, array_flip( $s ) );
2172 $s = $this->separatorTransformTable();
2174 $number = strtr( $number, array_flip( $s ) );
2177 $number = strtr( $number, array( ',' => '' ) );
2182 * Adds commas to a given number
2187 function commafy( $_ ) {
2188 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2191 function digitTransformTable() {
2192 return self
::$dataCache->getItem( $this->mCode
, 'digitTransformTable' );
2195 function separatorTransformTable() {
2196 return self
::$dataCache->getItem( $this->mCode
, 'separatorTransformTable' );
2200 * Take a list of strings and build a locale-friendly comma-separated
2201 * list, using the local comma-separator message.
2202 * The last two strings are chained with an "and".
2207 function listToText( $l ) {
2209 $m = count( $l ) - 1;
2211 return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2213 for ( $i = $m; $i >= 0; $i-- ) {
2216 } else if ( $i == $m - 1 ) {
2217 $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2219 $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2227 * Take a list of strings and build a locale-friendly comma-separated
2228 * list, using the local comma-separator message.
2229 * @param $list array of strings to put in a comma list
2232 function commaList( $list ) {
2237 array( 'parsemag', 'escapenoentities', 'language' => $this )
2243 * Take a list of strings and build a locale-friendly semicolon-separated
2244 * list, using the local semicolon-separator message.
2245 * @param $list array of strings to put in a semicolon list
2248 function semicolonList( $list ) {
2252 'semicolon-separator',
2253 array( 'parsemag', 'escapenoentities', 'language' => $this )
2259 * Same as commaList, but separate it with the pipe instead.
2260 * @param $list array of strings to put in a pipe list
2263 function pipeList( $list ) {
2268 array( 'escapenoentities', 'language' => $this )
2274 * Truncate a string to a specified length in bytes, appending an optional
2275 * string (e.g. for ellipses)
2277 * The database offers limited byte lengths for some columns in the database;
2278 * multi-byte character sets mean we need to ensure that only whole characters
2279 * are included, otherwise broken characters can be passed to the user
2281 * If $length is negative, the string will be truncated from the beginning
2283 * @param $string String to truncate
2284 * @param $length Int: maximum length (excluding ellipses)
2285 * @param $ellipsis String to append to the truncated text
2288 function truncate( $string, $length, $ellipsis = '...' ) {
2289 # Use the localized ellipsis character
2290 if ( $ellipsis == '...' ) {
2291 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2293 # Check if there is no need to truncate
2294 if ( $length == 0 ) {
2296 } elseif ( strlen( $string ) <= abs( $length ) ) {
2299 $stringOriginal = $string;
2300 if ( $length > 0 ) {
2301 $string = substr( $string, 0, $length ); // xyz...
2302 $string = $this->removeBadCharLast( $string );
2303 $string = $string . $ellipsis;
2305 $string = substr( $string, $length ); // ...xyz
2306 $string = $this->removeBadCharFirst( $string );
2307 $string = $ellipsis . $string;
2309 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
2310 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2313 return $stringOriginal;
2318 * Remove bytes that represent an incomplete Unicode character
2319 * at the end of string (e.g. bytes of the char are missing)
2321 * @param $string String
2324 protected function removeBadCharLast( $string ) {
2325 $char = ord( $string[strlen( $string ) - 1] );
2327 if ( $char >= 0xc0 ) {
2328 # We got the first byte only of a multibyte char; remove it.
2329 $string = substr( $string, 0, -1 );
2330 } elseif ( $char >= 0x80 &&
2331 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2332 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2334 # We chopped in the middle of a character; remove it
2341 * Remove bytes that represent an incomplete Unicode character
2342 * at the start of string (e.g. bytes of the char are missing)
2344 * @param $string String
2347 protected function removeBadCharFirst( $string ) {
2348 $char = ord( $string[0] );
2349 if ( $char >= 0x80 && $char < 0xc0 ) {
2350 # We chopped in the middle of a character; remove the whole thing
2351 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2357 * Truncate a string of valid HTML to a specified length in bytes,
2358 * appending an optional string (e.g. for ellipses), and return valid HTML
2360 * This is only intended for styled/linked text, such as HTML with
2361 * tags like <span> and <a>, were the tags are self-contained (valid HTML)
2363 * Note: tries to fix broken HTML with MWTidy
2365 * @param string $text String to truncate
2366 * @param int $length (zero/positive) Maximum length (excluding ellipses)
2367 * @param string $ellipsis String to append to the truncated text
2370 function truncateHtml( $text, $length, $ellipsis = '...' ) {
2371 # Use the localized ellipsis character
2372 if ( $ellipsis == '...' ) {
2373 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2375 # Check if there is no need to truncate
2376 if ( $length <= 0 ) {
2377 return $ellipsis; // no text shown, nothing to format
2378 } elseif ( strlen( $text ) <= $length ) {
2379 return $text; // string short enough even *with* HTML
2381 $text = MWTidy
::tidy( $text ); // fix tags
2382 $displayLen = 0; // innerHTML legth so far
2383 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2384 $tagType = 0; // 0-open, 1-close
2385 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2386 $entityState = 0; // 0-not entity, 1-entity
2387 $tag = $ret = $ch = '';
2388 $openTags = array();
2389 $textLen = strlen( $text );
2390 for ( $pos = 0; $pos < $textLen; ++
$pos ) {
2392 $lastCh = $pos ?
$text[$pos - 1] : '';
2393 $ret .= $ch; // add to result string
2395 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2396 $entityState = 0; // for bad HTML
2397 $bracketState = 1; // tag started (checking for backslash)
2398 } elseif ( $ch == '>' ) {
2399 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2400 $entityState = 0; // for bad HTML
2401 $bracketState = 0; // out of brackets
2402 } elseif ( $bracketState == 1 ) {
2404 $tagType = 1; // close tag (e.g. "</span>")
2406 $tagType = 0; // open tag (e.g. "<span>")
2409 $bracketState = 2; // building tag name
2410 } elseif ( $bracketState == 2 ) {
2414 // Name found (e.g. "<a href=..."), add on tag attributes...
2415 $pos +
= $this->truncate_skip( $ret, $text, "<>", $pos +
1 );
2417 } elseif ( $bracketState == 0 ) {
2418 if ( $entityState ) {
2421 $displayLen++
; // entity is one displayed char
2425 $entityState = 1; // entity found, (e.g. " ")
2427 $displayLen++
; // this char is displayed
2428 // Add on the other display text after this...
2429 $skipped = $this->truncate_skip(
2430 $ret, $text, "<>&", $pos +
1, $length - $displayLen );
2431 $displayLen +
= $skipped;
2436 # Consider truncation once the display length has reached the maximim.
2437 # Double-check that we're not in the middle of a bracket/entity...
2438 if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
2439 if ( !$testingEllipsis ) {
2440 $testingEllipsis = true;
2441 # Save where we are; we will truncate here unless
2442 # the ellipsis actually makes the string longer.
2443 $pOpenTags = $openTags; // save state
2444 $pRet = $ret; // save state
2445 } elseif ( $displayLen > ( $length +
strlen( $ellipsis ) ) ) {
2446 # Ellipsis won't make string longer/equal, the truncation point was OK.
2447 $openTags = $pOpenTags; // reload state
2448 $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2449 $ret .= $ellipsis; // add ellipsis
2454 if ( $displayLen == 0 ) {
2455 return ''; // no text shown, nothing to format
2457 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags ); // for bad HTML
2458 while ( count( $openTags ) > 0 ) {
2459 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2464 // truncateHtml() helper function
2465 // like strcspn() but adds the skipped chars to $ret
2466 private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
2468 if ( $start < strlen( $text ) ) {
2469 $skipCount = strcspn( $text, $search, $start, $len );
2470 $ret .= substr( $text, $start, $skipCount );
2475 // truncateHtml() helper function
2476 // (a) push or pop $tag from $openTags as needed
2477 // (b) clear $tag value
2478 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2479 $tag = ltrim( $tag );
2481 if ( $tagType == 0 && $lastCh != '/' ) {
2482 $openTags[] = $tag; // tag opened (didn't close itself)
2483 } else if ( $tagType == 1 ) {
2484 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2485 array_pop( $openTags ); // tag closed
2493 * Grammatical transformations, needed for inflected languages
2494 * Invoked by putting {{grammar:case|word}} in a message
2496 * @param $word string
2497 * @param $case string
2500 function convertGrammar( $word, $case ) {
2501 global $wgGrammarForms;
2502 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2503 return $wgGrammarForms[$this->getCode()][$case][$word];
2509 * Provides an alternative text depending on specified gender.
2510 * Usage {{gender:username|masculine|feminine|neutral}}.
2511 * username is optional, in which case the gender of current user is used,
2512 * but only in (some) interface messages; otherwise default gender is used.
2513 * If second or third parameter are not specified, masculine is used.
2514 * These details may be overriden per language.
2516 function gender( $gender, $forms ) {
2517 if ( !count( $forms ) ) {
2520 $forms = $this->preConvertPlural( $forms, 2 );
2521 if ( $gender === 'male' ) {
2524 if ( $gender === 'female' ) {
2527 return isset( $forms[2] ) ?
$forms[2] : $forms[0];
2531 * Plural form transformations, needed for some languages.
2532 * For example, there are 3 form of plural in Russian and Polish,
2533 * depending on "count mod 10". See [[w:Plural]]
2534 * For English it is pretty simple.
2536 * Invoked by putting {{plural:count|wordform1|wordform2}}
2537 * or {{plural:count|wordform1|wordform2|wordform3}}
2539 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2541 * @param $count Integer: non-localized number
2542 * @param $forms Array: different plural forms
2543 * @return string Correct form of plural for $count in this language
2545 function convertPlural( $count, $forms ) {
2546 if ( !count( $forms ) ) {
2549 $forms = $this->preConvertPlural( $forms, 2 );
2551 return ( $count == 1 ) ?
$forms[0] : $forms[1];
2555 * Checks that convertPlural was given an array and pads it to requested
2556 * amound of forms by copying the last one.
2558 * @param $count Integer: How many forms should there be at least
2559 * @param $forms Array of forms given to convertPlural
2560 * @return array Padded array of forms or an exception if not an array
2562 protected function preConvertPlural( /* Array */ $forms, $count ) {
2563 while ( count( $forms ) < $count ) {
2564 $forms[] = $forms[count( $forms ) - 1];
2570 * For translating of expiry times
2571 * @param $str String: the validated block time in English
2572 * @return Somehow translated block time
2573 * @see LanguageFi.php for example implementation
2575 function translateBlockExpiry( $str ) {
2576 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
2578 if ( $scBlockExpiryOptions == '-' ) {
2582 foreach ( explode( ',', $scBlockExpiryOptions ) as $option ) {
2583 if ( strpos( $option, ':' ) === false ) {
2586 list( $show, $value ) = explode( ':', $option );
2587 if ( strcmp( $str, $value ) == 0 ) {
2588 return htmlspecialchars( trim( $show ) );
2596 * languages like Chinese need to be segmented in order for the diff
2599 * @param $text String
2602 function segmentForDiff( $text ) {
2607 * and unsegment to show the result
2609 * @param $text String
2612 function unsegmentForDiff( $text ) {
2616 # convert text to all supported variants
2617 function autoConvertToAllVariants( $text ) {
2618 return $this->mConverter
->autoConvertToAllVariants( $text );
2621 # convert text to different variants of a language.
2622 function convert( $text ) {
2623 return $this->mConverter
->convert( $text );
2626 # Convert a Title object to a string in the preferred variant
2627 function convertTitle( $title ) {
2628 return $this->mConverter
->convertTitle( $title );
2631 # Check if this is a language with variants
2632 function hasVariants() {
2633 return sizeof( $this->getVariants() ) > 1;
2636 # Put custom tags (e.g. -{ }-) around math to prevent conversion
2637 function armourMath( $text ) {
2638 return $this->mConverter
->armourMath( $text );
2642 * Perform output conversion on a string, and encode for safe HTML output.
2643 * @param $text String text to be converted
2644 * @param $isTitle Bool whether this conversion is for the article title
2646 * @todo this should get integrated somewhere sane
2648 function convertHtml( $text, $isTitle = false ) {
2649 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2652 function convertCategoryKey( $key ) {
2653 return $this->mConverter
->convertCategoryKey( $key );
2657 * Get the list of variants supported by this langauge
2658 * see sample implementation in LanguageZh.php
2660 * @return array an array of language codes
2662 function getVariants() {
2663 return $this->mConverter
->getVariants();
2666 function getPreferredVariant( $fromUser = true, $fromHeader = false ) {
2667 return $this->mConverter
->getPreferredVariant( $fromUser, $fromHeader );
2671 * If a language supports multiple variants, it is
2672 * possible that non-existing link in one variant
2673 * actually exists in another variant. this function
2674 * tries to find it. See e.g. LanguageZh.php
2676 * @param $link String: the name of the link
2677 * @param $nt Mixed: the title object of the link
2678 * @param $ignoreOtherCond Boolean: to disable other conditions when
2679 * we need to transclude a template or update a category's link
2680 * @return null the input parameters may be modified upon return
2682 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2683 $this->mConverter
->findVariantLink( $link, $nt, $ignoreOtherCond );
2687 * If a language supports multiple variants, converts text
2688 * into an array of all possible variants of the text:
2689 * 'variant' => text in that variant
2691 function convertLinkToAllVariants( $text ) {
2692 return $this->mConverter
->convertLinkToAllVariants( $text );
2696 * returns language specific options used by User::getPageRenderHash()
2697 * for example, the preferred language variant
2701 function getExtraHashOptions() {
2702 return $this->mConverter
->getExtraHashOptions();
2706 * For languages that support multiple variants, the title of an
2707 * article may be displayed differently in different variants. this
2708 * function returns the apporiate title defined in the body of the article.
2712 function getParsedTitle() {
2713 return $this->mConverter
->getParsedTitle();
2717 * Enclose a string with the "no conversion" tag. This is used by
2718 * various functions in the Parser
2720 * @param $text String: text to be tagged for no conversion
2722 * @return string the tagged text
2724 function markNoConversion( $text, $noParse = false ) {
2725 return $this->mConverter
->markNoConversion( $text, $noParse );
2729 * A regular expression to match legal word-trailing characters
2730 * which should be merged onto a link of the form [[foo]]bar.
2734 function linkTrail() {
2735 return self
::$dataCache->getItem( $this->mCode
, 'linkTrail' );
2738 function getLangObj() {
2743 * Get the RFC 3066 code for this language object
2745 function getCode() {
2746 return $this->mCode
;
2749 function setCode( $code ) {
2750 $this->mCode
= $code;
2754 * Get the name of a file for a certain language code
2755 * @param $prefix string Prepend this to the filename
2756 * @param $code string Language code
2757 * @param $suffix string Append this to the filename
2758 * @return string $prefix . $mangledCode . $suffix
2760 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2761 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2765 * Get the language code from a file name. Inverse of getFileName()
2766 * @param $filename string $prefix . $languageCode . $suffix
2767 * @param $prefix string Prefix before the language code
2768 * @param $suffix string Suffix after the language code
2769 * @return Language code, or false if $prefix or $suffix isn't found
2771 static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2773 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2774 preg_quote( $suffix, '/' ) . '/', $filename, $m );
2775 if ( !count( $m ) ) {
2778 return str_replace( '_', '-', strtolower( $m[1] ) );
2781 static function getMessagesFileName( $code ) {
2783 return self
::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2786 static function getClassFileName( $code ) {
2788 return self
::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2792 * Get the fallback for a given language
2794 static function getFallbackFor( $code ) {
2795 if ( $code === 'en' ) {
2799 return self
::getLocalisationCache()->getItem( $code, 'fallback' );
2804 * Get all messages for a given language
2805 * WARNING: this may take a long time
2807 static function getMessagesFor( $code ) {
2808 return self
::getLocalisationCache()->getItem( $code, 'messages' );
2812 * Get a message for a given language
2814 static function getMessageFor( $key, $code ) {
2815 return self
::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2818 function fixVariableInNamespace( $talk ) {
2819 if ( strpos( $talk, '$1' ) === false ) {
2823 global $wgMetaNamespace;
2824 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2826 # Allow grammar transformations
2827 # Allowing full message-style parsing would make simple requests
2828 # such as action=raw much more expensive than they need to be.
2829 # This will hopefully cover most cases.
2830 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2831 array( &$this, 'replaceGrammarInNamespace' ), $talk );
2832 return str_replace( ' ', '_', $talk );
2835 function replaceGrammarInNamespace( $m ) {
2836 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2839 static function getCaseMaps() {
2840 static $wikiUpperChars, $wikiLowerChars;
2841 if ( isset( $wikiUpperChars ) ) {
2842 return array( $wikiUpperChars, $wikiLowerChars );
2845 wfProfileIn( __METHOD__
);
2846 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
2847 if ( $arr === false ) {
2848 throw new MWException(
2849 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
2852 wfProfileOut( __METHOD__
);
2853 return array( $wikiUpperChars, $wikiLowerChars );
2856 function formatTimePeriod( $seconds ) {
2857 if ( $seconds < 10 ) {
2858 return $this->formatNum( sprintf( "%.1f", $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2859 } elseif ( $seconds < 60 ) {
2860 return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2861 } elseif ( $seconds < 3600 ) {
2862 $minutes = floor( $seconds / 60 );
2863 $secondsPart = round( fmod( $seconds, 60 ) );
2864 if ( $secondsPart == 60 ) {
2868 return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2869 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2871 $hours = floor( $seconds / 3600 );
2872 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
2873 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
2874 if ( $secondsPart == 60 ) {
2878 if ( $minutes == 60 ) {
2882 return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
2883 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2884 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2888 function formatBitrate( $bps ) {
2889 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
2891 return $this->formatNum( $bps ) . $units[0];
2893 $unitIndex = floor( log10( $bps ) / 3 );
2894 $mantissa = $bps / pow( 1000, $unitIndex );
2895 if ( $mantissa < 10 ) {
2896 $mantissa = round( $mantissa, 1 );
2898 $mantissa = round( $mantissa );
2900 return $this->formatNum( $mantissa ) . $units[$unitIndex];
2904 * Format a size in bytes for output, using an appropriate
2905 * unit (B, KB, MB or GB) according to the magnitude in question
2907 * @param $size Size to format
2908 * @return string Plain text (not HTML)
2910 function formatSize( $size ) {
2911 // For small sizes no decimal places necessary
2913 if ( $size > 1024 ) {
2914 $size = $size / 1024;
2915 if ( $size > 1024 ) {
2916 $size = $size / 1024;
2917 // For MB and bigger two decimal places are smarter
2919 if ( $size > 1024 ) {
2920 $size = $size / 1024;
2921 $msg = 'size-gigabytes';
2923 $msg = 'size-megabytes';
2926 $msg = 'size-kilobytes';
2929 $msg = 'size-bytes';
2931 $size = round( $size, $round );
2932 $text = $this->getMessageFromDB( $msg );
2933 return str_replace( '$1', $this->formatNum( $size ), $text );
2937 * Get the conversion rule title, if any.
2939 function getConvRuleTitle() {
2940 return $this->mConverter
->getConvRuleTitle();
2944 * Given a string, convert it to a (hopefully short) key that can be used
2945 * for efficient sorting. A binary sort according to the sortkeys
2946 * corresponds to a logical sort of the corresponding strings. Current
2947 * code expects that a null character should sort before all others, but
2948 * has no other particular expectations (and that one can be changed if
2951 * @param string $string UTF-8 string
2952 * @return string Binary sortkey
2954 public function convertToSortkey( $string ) {
2955 # Fake function for now
2956 return strtoupper( $string );
2960 * Does it make sense for lists to be split up into sections based on their
2961 * first letter? Logogram-based scripts probably want to return false.
2963 * TODO: Use this in CategoryPage.php.
2967 public function usesFirstLettersInLists() {
2972 * Given a string, return the logical "first letter" to be used for
2973 * grouping on category pages and so on. This has to be coordinated
2974 * carefully with convertToSortkey(), or else the sorted list might jump
2975 * back and forth between the same "initial letters" or other pathological
2976 * behavior. For instance, if you just return the first character, but "a"
2977 * sorts the same as "A" based on convertToSortkey(), then you might get a
2989 * etc., assuming for the sake of argument that $wgCapitalLinks is false.
2990 * Obviously, this is ignored if usesFirstLettersInLists() is false.
2992 * @param string $string UTF-8 string
2993 * @return string UTF-8 string corresponding to the first letter of input
2995 public function firstLetterForLists( $string ) {
2996 if ( $string[0] == "\0" ) {
2997 $string = substr( $string, 1 );
2999 return strtoupper( $this->firstChar( $string ) );