languages/Language.php

   1 <?php
   2 /**
   3  * @defgroup Language Language
   4  *
   5  * @file
   6  * @ingroup Language
   7  */
   8
   9 if ( !defined( 'MEDIAWIKI' ) ) {
  10         echo "This file is part of MediaWiki, it is not a valid entry point.\n";
  11         exit( 1 );
  12 }
  13
  14 # Read language names
  15 global $wgLanguageNames;
  16 require_once( dirname( __FILE__ ) . '/Names.php' );
  17
  18 global $wgInputEncoding, $wgOutputEncoding;
  19
  20 /**
  21  * These are always UTF-8, they exist only for backwards compatibility
  22  */
  23 $wgInputEncoding    = 'UTF-8';
  24 $wgOutputEncoding       = 'UTF-8';
  25
  26 if ( function_exists( 'mb_strtoupper' ) ) {
  27         mb_internal_encoding( 'UTF-8' );
  28 }
  29
  30 /**
  31  * a fake language converter
  32  *
  33  * @ingroup Language
  34  */
  35 class FakeConverter {
  36         var $mLang;
  37         function FakeConverter( $langobj ) { $this->mLang = $langobj; }
  38         function autoConvertToAllVariants( $text ) { return $text; }
  39         function convert( $t ) { return $t; }
  40         function convertTitle( $t ) { return $t->getPrefixedText(); }
  41         function getVariants() { return array( $this->mLang->getCode() ); }
  42         function getPreferredVariant() { return $this->mLang->getCode(); }
  43         function getConvRuleTitle() { return false; }
  44         function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
  45         function getExtraHashOptions() { return ''; }
  46         function getParsedTitle() { return ''; }
  47         function markNoConversion( $text, $noParse = false ) { return $text; }
  48         function convertCategoryKey( $key ) { return $key; }
  49         function convertLinkToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
  50         function armourMath( $text ) { return $text; }
  51 }
  52
  53 /**
  54  * Internationalisation code
  55  * @ingroup Language
  56  */
  57 class Language {
  58         var $mConverter, $mVariants, $mCode, $mLoaded = false;
  59         var $mMagicExtensions = array(), $mMagicHookDone = false;
  60
  61         var $mNamespaceIds, $namespaceNames, $namespaceAliases;
  62         var $dateFormatStrings = array();
  63         var $mExtendedSpecialPageAliases;
  64
  65         /**
  66          * ReplacementArray object caches
  67          */
  68         var $transformData = array();
  69
  70         static public $dataCache;
  71         static public $mLangObjCache = array();
  72
  73         static public $mWeekdayMsgs = array(
  74                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
  75                 'friday', 'saturday'
  76         );
  77
  78         static public $mWeekdayAbbrevMsgs = array(
  79                 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
  80         );
  81
  82         static public $mMonthMsgs = array(
  83                 'january', 'february', 'march', 'april', 'may_long', 'june',
  84                 'july', 'august', 'september', 'october', 'november',
  85                 'december'
  86         );
  87         static public $mMonthGenMsgs = array(
  88                 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
  89                 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
  90                 'december-gen'
  91         );
  92         static public $mMonthAbbrevMsgs = array(
  93                 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
  94                 'sep', 'oct', 'nov', 'dec'
  95         );
  96
  97         static public $mIranianCalendarMonthMsgs = array(
  98                 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
  99                 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
 100                 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
 101                 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
 102         );
 103
 104         static public $mHebrewCalendarMonthMsgs = array(
 105                 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
 106                 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
 107                 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
 108                 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
 109                 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
 110         );
 111
 112         static public $mHebrewCalendarMonthGenMsgs = array(
 113                 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
 114                 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
 115                 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
 116                 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
 117                 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
 118         );
 119
 120         static public $mHijriCalendarMonthMsgs = array(
 121                 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
 122                 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
 123                 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
 124                 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
 125         );
 126
 127         /**
 128          * Get a cached language object for a given language code
 129          */
 130         static function factory( $code ) {
 131                 if ( !isset( self::$mLangObjCache[$code] ) ) {
 132                         if ( count( self::$mLangObjCache ) > 10 ) {
 133                                 // Don't keep a billion objects around, that's stupid.
 134                                 self::$mLangObjCache = array();
 135                         }
 136                         self::$mLangObjCache[$code] = self::newFromCode( $code );
 137                 }
 138                 return self::$mLangObjCache[$code];
 139         }
 140
 141         /**
 142          * Create a language object for a given language code
 143          */
 144         protected static function newFromCode( $code ) {
 145                 global $IP;
 146                 static $recursionLevel = 0;
 147                 if ( $code == 'en' ) {
 148                         $class = 'Language';
 149                 } else {
 150                         $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
 151                         // Preload base classes to work around APC/PHP5 bug
 152                         if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
 153                                 include_once( "$IP/languages/classes/$class.deps.php" );
 154                         }
 155                         if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
 156                                 include_once( "$IP/languages/classes/$class.php" );
 157                         }
 158                 }
 159
 160                 if ( $recursionLevel > 5 ) {
 161                         throw new MWException( "Language fallback loop detected when creating class $class\n" );
 162                 }
 163
 164                 if ( !class_exists( $class ) ) {
 165                         $fallback = Language::getFallbackFor( $code );
 166                         ++$recursionLevel;
 167                         $lang = Language::newFromCode( $fallback );
 168                         --$recursionLevel;
 169                         $lang->setCode( $code );
 170                 } else {
 171                         $lang = new $class;
 172                 }
 173                 return $lang;
 174         }
 175
 176         /**
 177          * Get the LocalisationCache instance
 178          */
 179         public static function getLocalisationCache() {
 180                 if ( is_null( self::$dataCache ) ) {
 181                         global $wgLocalisationCacheConf;
 182                         $class = $wgLocalisationCacheConf['class'];
 183                         self::$dataCache = new $class( $wgLocalisationCacheConf );
 184                 }
 185                 return self::$dataCache;
 186         }
 187
 188         function __construct() {
 189                 $this->mConverter = new FakeConverter( $this );
 190                 // Set the code to the name of the descendant
 191                 if ( get_class( $this ) == 'Language' ) {
 192                         $this->mCode = 'en';
 193                 } else {
 194                         $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
 195                 }
 196                 self::getLocalisationCache();
 197         }
 198
 199         /**
 200          * Reduce memory usage
 201          */
 202         function __destruct() {
 203                 foreach ( $this as $name => $value ) {
 204                         unset( $this->$name );
 205                 }
 206         }
 207
 208         /**
 209          * Hook which will be called if this is the content language.
 210          * Descendants can use this to register hook functions or modify globals
 211          */
 212         function initContLang() { }
 213
 214         /**
 215          * @deprecated Use User::getDefaultOptions()
 216          * @return array
 217          */
 218         function getDefaultUserOptions() {
 219                 wfDeprecated( __METHOD__ );
 220                 return User::getDefaultOptions();
 221         }
 222
 223         function getFallbackLanguageCode() {
 224                 if ( $this->mCode === 'en' ) {
 225                         return false;
 226                 } else {
 227                         return self::$dataCache->getItem( $this->mCode, 'fallback' );
 228                 }
 229         }
 230
 231         /**
 232          * Exports $wgBookstoreListEn
 233          * @return array
 234          */
 235         function getBookstoreList() {
 236                 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
 237         }
 238
 239         /**
 240          * @return array
 241          */
 242         function getNamespaces() {
 243                 if ( is_null( $this->namespaceNames ) ) {
 244                         global $wgMetaNamespace, $wgMetaNamespaceTalk;
 245
 246                         $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
 247                         $validNamespaces = MWNamespace::getCanonicalNamespaces();
 248
 249                         $this->namespaceNames = $validNamespaces + $this->namespaceNames;
 250
 251                         $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
 252                         if ( $wgMetaNamespaceTalk ) {
 253                                 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
 254                         } else {
 255                                 $talk = $this->namespaceNames[NS_PROJECT_TALK];
 256                                 $this->namespaceNames[NS_PROJECT_TALK] =
 257                                         $this->fixVariableInNamespace( $talk );
 258                         }
 259
 260                         # Sometimes a language will be localised but not actually exist on this wiki.
 261                         foreach( $this->namespaceNames as $key => $text ) {
 262                                 if ( !isset( $validNamespaces[$key] ) ) {
 263                                         unset( $this->namespaceNames[$key] );
 264                                 }
 265                         }
 266
 267                         # The above mixing may leave namespaces out of canonical order.
 268                         # Re-order by namespace ID number...
 269                         ksort( $this->namespaceNames );
 270                 }
 271                 return $this->namespaceNames;
 272         }
 273
 274         /**
 275          * A convenience function that returns the same thing as
 276          * getNamespaces() except with the array values changed to ' '
 277          * where it found '_', useful for producing output to be displayed
 278          * e.g. in <select> forms.
 279          *
 280          * @return array
 281          */
 282         function getFormattedNamespaces() {
 283                 $ns = $this->getNamespaces();
 284                 foreach ( $ns as $k => $v ) {
 285                         $ns[$k] = strtr( $v, '_', ' ' );
 286                 }
 287                 return $ns;
 288         }
 289
 290         /**
 291          * Get a namespace value by key
 292          * <code>
 293          * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
 294          * echo $mw_ns; // prints 'MediaWiki'
 295          * </code>
 296          *
 297          * @param $index Int: the array key of the namespace to return
 298          * @return mixed, string if the namespace value exists, otherwise false
 299          */
 300         function getNsText( $index ) {
 301                 $ns = $this->getNamespaces();
 302                 return isset( $ns[$index] ) ? $ns[$index] : false;
 303         }
 304
 305         /**
 306          * A convenience function that returns the same thing as
 307          * getNsText() except with '_' changed to ' ', useful for
 308          * producing output.
 309          *
 310          * @return array
 311          */
 312         function getFormattedNsText( $index ) {
 313                 $ns = $this->getNsText( $index );
 314                 return strtr( $ns, '_', ' ' );
 315         }
 316
 317         /**
 318          * Get a namespace key by value, case insensitive.
 319          * Only matches namespace names for the current language, not the
 320          * canonical ones defined in Namespace.php.
 321          *
 322          * @param $text String
 323          * @return mixed An integer if $text is a valid value otherwise false
 324          */
 325         function getLocalNsIndex( $text ) {
 326                 $lctext = $this->lc( $text );
 327                 $ids = $this->getNamespaceIds();
 328                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 329         }
 330
 331         function getNamespaceAliases() {
 332                 if ( is_null( $this->namespaceAliases ) ) {
 333                         $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
 334                         if ( !$aliases ) {
 335                                 $aliases = array();
 336                         } else {
 337                                 foreach ( $aliases as $name => $index ) {
 338                                         if ( $index === NS_PROJECT_TALK ) {
 339                                                 unset( $aliases[$name] );
 340                                                 $name = $this->fixVariableInNamespace( $name );
 341                                                 $aliases[$name] = $index;
 342                                         }
 343                                 }
 344                         }
 345                         $this->namespaceAliases = $aliases;
 346                 }
 347                 return $this->namespaceAliases;
 348         }
 349
 350         function getNamespaceIds() {
 351                 if ( is_null( $this->mNamespaceIds ) ) {
 352                         global $wgNamespaceAliases;
 353                         # Put namespace names and aliases into a hashtable.
 354                         # If this is too slow, then we should arrange it so that it is done
 355                         # before caching. The catch is that at pre-cache time, the above
 356                         # class-specific fixup hasn't been done.
 357                         $this->mNamespaceIds = array();
 358                         foreach ( $this->getNamespaces() as $index => $name ) {
 359                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 360                         }
 361                         foreach ( $this->getNamespaceAliases() as $name => $index ) {
 362                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 363                         }
 364                         if ( $wgNamespaceAliases ) {
 365                                 foreach ( $wgNamespaceAliases as $name => $index ) {
 366                                         $this->mNamespaceIds[$this->lc( $name )] = $index;
 367                                 }
 368                         }
 369                 }
 370                 return $this->mNamespaceIds;
 371         }
 372
 373
 374         /**
 375          * Get a namespace key by value, case insensitive.  Canonical namespace
 376          * names override custom ones defined for the current language.
 377          *
 378          * @param $text String
 379          * @return mixed An integer if $text is a valid value otherwise false
 380          */
 381         function getNsIndex( $text ) {
 382                 $lctext = $this->lc( $text );
 383                 if ( ( $ns = MWNamespace::getCanonicalIndex( $lctext ) ) !== null ) {
 384                         return $ns;
 385                 }
 386                 $ids = $this->getNamespaceIds();
 387                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 388         }
 389
 390         /**
 391          * short names for language variants used for language conversion links.
 392          *
 393          * @param $code String
 394          * @return string
 395          */
 396         function getVariantname( $code ) {
 397                 return $this->getMessageFromDB( "variantname-$code" );
 398         }
 399
 400         function specialPage( $name ) {
 401                 $aliases = $this->getSpecialPageAliases();
 402                 if ( isset( $aliases[$name][0] ) ) {
 403                         $name = $aliases[$name][0];
 404                 }
 405                 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
 406         }
 407
 408         function getQuickbarSettings() {
 409                 return array(
 410                         $this->getMessage( 'qbsettings-none' ),
 411                         $this->getMessage( 'qbsettings-fixedleft' ),
 412                         $this->getMessage( 'qbsettings-fixedright' ),
 413                         $this->getMessage( 'qbsettings-floatingleft' ),
 414                         $this->getMessage( 'qbsettings-floatingright' )
 415                 );
 416         }
 417
 418         function getMathNames() {
 419                 return self::$dataCache->getItem( $this->mCode, 'mathNames' );
 420         }
 421
 422         function getDatePreferences() {
 423                 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
 424         }
 425
 426         function getDateFormats() {
 427                 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
 428         }
 429
 430         function getDefaultDateFormat() {
 431                 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
 432                 if ( $df === 'dmy or mdy' ) {
 433                         global $wgAmericanDates;
 434                         return $wgAmericanDates ? 'mdy' : 'dmy';
 435                 } else {
 436                         return $df;
 437                 }
 438         }
 439
 440         function getDatePreferenceMigrationMap() {
 441                 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
 442         }
 443
 444         function getImageFile( $image ) {
 445                 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
 446         }
 447
 448         function getDefaultUserOptionOverrides() {
 449                 return self::$dataCache->getItem( $this->mCode, 'defaultUserOptionOverrides' );
 450         }
 451
 452         function getExtraUserToggles() {
 453                 return self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
 454         }
 455
 456         function getUserToggle( $tog ) {
 457                 return $this->getMessageFromDB( "tog-$tog" );
 458         }
 459
 460         /**
 461          * Get language names, indexed by code.
 462          * If $customisedOnly is true, only returns codes with a messages file
 463          */
 464         public static function getLanguageNames( $customisedOnly = false ) {
 465                 global $wgLanguageNames, $wgExtraLanguageNames;
 466                 $allNames = $wgExtraLanguageNames + $wgLanguageNames;
 467                 if ( !$customisedOnly ) {
 468                         return $allNames;
 469                 }
 470
 471                 global $IP;
 472                 $names = array();
 473                 $dir = opendir( "$IP/languages/messages" );
 474                 while ( false !== ( $file = readdir( $dir ) ) ) {
 475                         $code = self::getCodeFromFileName( $file, 'Messages' );
 476                         if ( $code && isset( $allNames[$code] ) ) {
 477                                 $names[$code] = $allNames[$code];
 478                         }
 479                 }
 480                 closedir( $dir );
 481                 return $names;
 482         }
 483
 484         /**
 485          * Get a message from the MediaWiki namespace.
 486          *
 487          * @param $msg String: message name
 488          * @return string
 489          */
 490         function getMessageFromDB( $msg ) {
 491                 return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
 492         }
 493
 494         function getLanguageName( $code ) {
 495                 $names = self::getLanguageNames();
 496                 if ( !array_key_exists( $code, $names ) ) {
 497                         return '';
 498                 }
 499                 return $names[$code];
 500         }
 501
 502         function getMonthName( $key ) {
 503                 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
 504         }
 505
 506         function getMonthNameGen( $key ) {
 507                 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
 508         }
 509
 510         function getMonthAbbreviation( $key ) {
 511                 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
 512         }
 513
 514         function getWeekdayName( $key ) {
 515                 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
 516         }
 517
 518         function getWeekdayAbbreviation( $key ) {
 519                 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
 520         }
 521
 522         function getIranianCalendarMonthName( $key ) {
 523                 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
 524         }
 525
 526         function getHebrewCalendarMonthName( $key ) {
 527                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
 528         }
 529
 530         function getHebrewCalendarMonthNameGen( $key ) {
 531                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
 532         }
 533
 534         function getHijriCalendarMonthName( $key ) {
 535                 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
 536         }
 537
 538         /**
 539          * Used by date() and time() to adjust the time output.
 540          *
 541          * @param $ts Int the time in date('YmdHis') format
 542          * @param $tz Mixed: adjust the time by this amount (default false, mean we
 543          *            get user timecorrection setting)
 544          * @return int
 545          */
 546         function userAdjust( $ts, $tz = false ) {
 547                 global $wgUser, $wgLocalTZoffset;
 548
 549                 if ( $tz === false ) {
 550                         $tz = $wgUser->getOption( 'timecorrection' );
 551                 }
 552
 553                 $data = explode( '|', $tz, 3 );
 554
 555                 if ( $data[0] == 'ZoneInfo' ) {
 556                         if ( function_exists( 'timezone_open' ) && @timezone_open( $data[2] ) !== false ) {
 557                                 $date = date_create( $ts, timezone_open( 'UTC' ) );
 558                                 date_timezone_set( $date, timezone_open( $data[2] ) );
 559                                 $date = date_format( $date, 'YmdHis' );
 560                                 return $date;
 561                         }
 562                         # Unrecognized timezone, default to 'Offset' with the stored offset.
 563                         $data[0] = 'Offset';
 564                 }
 565
 566                 $minDiff = 0;
 567                 if ( $data[0] == 'System' || $tz == '' ) {
 568                         #  Global offset in minutes.
 569                         if ( isset( $wgLocalTZoffset ) ) {
 570                                 $minDiff = $wgLocalTZoffset;
 571                         }
 572                 } else if ( $data[0] == 'Offset' ) {
 573                         $minDiff = intval( $data[1] );
 574                 } else {
 575                         $data = explode( ':', $tz );
 576                         if ( count( $data ) == 2 ) {
 577                                 $data[0] = intval( $data[0] );
 578                                 $data[1] = intval( $data[1] );
 579                                 $minDiff = abs( $data[0] ) * 60 + $data[1];
 580                                 if ( $data[0] < 0 ) {
 581                                         $minDiff = -$minDiff;
 582                                 }
 583                         } else {
 584                                 $minDiff = intval( $data[0] ) * 60;
 585                         }
 586                 }
 587
 588                 # No difference ? Return time unchanged
 589                 if ( 0 == $minDiff ) {
 590                         return $ts;
 591                 }
 592
 593                 wfSuppressWarnings(); // E_STRICT system time bitching
 594                 # Generate an adjusted date; take advantage of the fact that mktime
 595                 # will normalize out-of-range values so we don't have to split $minDiff
 596                 # into hours and minutes.
 597                 $t = mktime( (
 598                   (int)substr( $ts, 8, 2 ) ), # Hours
 599                   (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
 600                   (int)substr( $ts, 12, 2 ), # Seconds
 601                   (int)substr( $ts, 4, 2 ), # Month
 602                   (int)substr( $ts, 6, 2 ), # Day
 603                   (int)substr( $ts, 0, 4 ) ); # Year
 604
 605                 $date = date( 'YmdHis', $t );
 606                 wfRestoreWarnings();
 607
 608                 return $date;
 609         }
 610
 611         /**
 612          * This is a workalike of PHP's date() function, but with better
 613          * internationalisation, a reduced set of format characters, and a better
 614          * escaping format.
 615          *
 616          * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
 617          * PHP manual for definitions. "o" format character is supported since
 618          * PHP 5.1.0, previous versions return literal o.
 619          * There are a number of extensions, which start with "x":
 620          *
 621          *    xn   Do not translate digits of the next numeric format character
 622          *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
 623          *    xr   Use roman numerals for the next numeric format character
 624          *    xh   Use hebrew numerals for the next numeric format character
 625          *    xx   Literal x
 626          *    xg   Genitive month name
 627          *
 628          *    xij  j (day number) in Iranian calendar
 629          *    xiF  F (month name) in Iranian calendar
 630          *    xin  n (month number) in Iranian calendar
 631          *    xiY  Y (full year) in Iranian calendar
 632          *
 633          *    xjj  j (day number) in Hebrew calendar
 634          *    xjF  F (month name) in Hebrew calendar
 635          *    xjt  t (days in month) in Hebrew calendar
 636          *    xjx  xg (genitive month name) in Hebrew calendar
 637          *    xjn  n (month number) in Hebrew calendar
 638          *    xjY  Y (full year) in Hebrew calendar
 639          *
 640          *    xmj  j (day number) in Hijri calendar
 641          *    xmF  F (month name) in Hijri calendar
 642          *    xmn  n (month number) in Hijri calendar
 643          *    xmY  Y (full year) in Hijri calendar
 644          *
 645          *    xkY  Y (full year) in Thai solar calendar. Months and days are
 646          *                       identical to the Gregorian calendar
 647          *    xoY  Y (full year) in Minguo calendar or Juche year.
 648          *                       Months and days are identical to the
 649          *                       Gregorian calendar
 650          *    xtY  Y (full year) in Japanese nengo. Months and days are
 651          *                       identical to the Gregorian calendar
 652          *
 653          * Characters enclosed in double quotes will be considered literal (with
 654          * the quotes themselves removed). Unmatched quotes will be considered
 655          * literal quotes. Example:
 656          *
 657          * "The month is" F       => The month is January
 658          * i's"                   => 20'11"
 659          *
 660          * Backslash escaping is also supported.
 661          *
 662          * Input timestamp is assumed to be pre-normalized to the desired local
 663          * time zone, if any.
 664          *
 665          * @param $format String
 666          * @param $ts String: 14-character timestamp
 667          *      YYYYMMDDHHMMSS
 668          *      01234567890123
 669          * @todo emulation of "o" format character for PHP pre 5.1.0
 670          * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
 671          */
 672         function sprintfDate( $format, $ts ) {
 673                 $s = '';
 674                 $raw = false;
 675                 $roman = false;
 676                 $hebrewNum = false;
 677                 $unix = false;
 678                 $rawToggle = false;
 679                 $iranian = false;
 680                 $hebrew = false;
 681                 $hijri = false;
 682                 $thai = false;
 683                 $minguo = false;
 684                 $tenno = false;
 685                 for ( $p = 0; $p < strlen( $format ); $p++ ) {
 686                         $num = false;
 687                         $code = $format[$p];
 688                         if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
 689                                 $code .= $format[++$p];
 690                         }
 691
 692                         if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
 693                                 $code .= $format[++$p];
 694                         }
 695
 696                         switch ( $code ) {
 697                                 case 'xx':
 698                                         $s .= 'x';
 699                                         break;
 700                                 case 'xn':
 701                                         $raw = true;
 702                                         break;
 703                                 case 'xN':
 704                                         $rawToggle = !$rawToggle;
 705                                         break;
 706                                 case 'xr':
 707                                         $roman = true;
 708                                         break;
 709                                 case 'xh':
 710                                         $hebrewNum = true;
 711                                         break;
 712                                 case 'xg':
 713                                         $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
 714                                         break;
 715                                 case 'xjx':
 716                                         if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
 717                                         $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
 718                                         break;
 719                                 case 'd':
 720                                         $num = substr( $ts, 6, 2 );
 721                                         break;
 722                                 case 'D':
 723                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
 724                                         $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
 725                                         break;
 726                                 case 'j':
 727                                         $num = intval( substr( $ts, 6, 2 ) );
 728                                         break;
 729                                 case 'xij':
 730                                         if ( !$iranian ) {
 731                                                 $iranian = self::tsToIranian( $ts );
 732                                         }
 733                                         $num = $iranian[2];
 734                                         break;
 735                                 case 'xmj':
 736                                         if ( !$hijri ) {
 737                                                 $hijri = self::tsToHijri( $ts );
 738                                         }
 739                                         $num = $hijri[2];
 740                                         break;
 741                                 case 'xjj':
 742                                         if ( !$hebrew ) {
 743                                                 $hebrew = self::tsToHebrew( $ts );
 744                                         }
 745                                         $num = $hebrew[2];
 746                                         break;
 747                                 case 'l':
 748                                         if ( !$unix ) {
 749                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 750                                         }
 751                                         $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
 752                                         break;
 753                                 case 'N':
 754                                         if ( !$unix ) {
 755                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 756                                         }
 757                                         $w = gmdate( 'w', $unix );
 758                                         $num = $w ? $w : 7;
 759                                         break;
 760                                 case 'w':
 761                                         if ( !$unix ) {
 762                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 763                                         }
 764                                         $num = gmdate( 'w', $unix );
 765                                         break;
 766                                 case 'z':
 767                                         if ( !$unix ) {
 768                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 769                                         }
 770                                         $num = gmdate( 'z', $unix );
 771                                         break;
 772                                 case 'W':
 773                                         if ( !$unix ) {
 774                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 775                                         }
 776                                         $num = gmdate( 'W', $unix );
 777                                         break;
 778                                 case 'F':
 779                                         $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
 780                                         break;
 781                                 case 'xiF':
 782                                         if ( !$iranian ) {
 783                                                 $iranian = self::tsToIranian( $ts );
 784                                         }
 785                                         $s .= $this->getIranianCalendarMonthName( $iranian[1] );
 786                                         break;
 787                                 case 'xmF':
 788                                         if ( !$hijri ) {
 789                                                 $hijri = self::tsToHijri( $ts );
 790                                         }
 791                                         $s .= $this->getHijriCalendarMonthName( $hijri[1] );
 792                                         break;
 793                                 case 'xjF':
 794                                         if ( !$hebrew ) {
 795                                                 $hebrew = self::tsToHebrew( $ts );
 796                                         }
 797                                         $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
 798                                         break;
 799                                 case 'm':
 800                                         $num = substr( $ts, 4, 2 );
 801                                         break;
 802                                 case 'M':
 803                                         $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
 804                                         break;
 805                                 case 'n':
 806                                         $num = intval( substr( $ts, 4, 2 ) );
 807                                         break;
 808                                 case 'xin':
 809                                         if ( !$iranian ) {
 810                                                 $iranian = self::tsToIranian( $ts );
 811                                         }
 812                                         $num = $iranian[1];
 813                                         break;
 814                                 case 'xmn':
 815                                         if ( !$hijri ) {
 816                                                 $hijri = self::tsToHijri ( $ts );
 817                                         }
 818                                         $num = $hijri[1];
 819                                         break;
 820                                 case 'xjn':
 821                                         if ( !$hebrew ) {
 822                                                 $hebrew = self::tsToHebrew( $ts );
 823                                         }
 824                                         $num = $hebrew[1];
 825                                         break;
 826                                 case 't':
 827                                         if ( !$unix ) {
 828                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 829                                         }
 830                                         $num = gmdate( 't', $unix );
 831                                         break;
 832                                 case 'xjt':
 833                                         if ( !$hebrew ) {
 834                                                 $hebrew = self::tsToHebrew( $ts );
 835                                         }
 836                                         $num = $hebrew[3];
 837                                         break;
 838                                 case 'L':
 839                                         if ( !$unix ) {
 840                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 841                                         }
 842                                         $num = gmdate( 'L', $unix );
 843                                         break;
 844                                 # 'o' is supported since PHP 5.1.0
 845                                 # return literal if not supported
 846                                 # TODO: emulation for pre 5.1.0 versions
 847                                 case 'o':
 848                                         if ( !$unix ) {
 849                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 850                                         }
 851                                         if ( version_compare( PHP_VERSION, '5.1.0' ) === 1 ) {
 852                                                 $num = date( 'o', $unix );
 853                                         } else {
 854                                                 $s .= 'o';
 855                                         }
 856                                         break;
 857                                 case 'Y':
 858                                         $num = substr( $ts, 0, 4 );
 859                                         break;
 860                                 case 'xiY':
 861                                         if ( !$iranian ) {
 862                                                 $iranian = self::tsToIranian( $ts );
 863                                         }
 864                                         $num = $iranian[0];
 865                                         break;
 866                                 case 'xmY':
 867                                         if ( !$hijri ) {
 868                                                 $hijri = self::tsToHijri( $ts );
 869                                         }
 870                                         $num = $hijri[0];
 871                                         break;
 872                                 case 'xjY':
 873                                         if ( !$hebrew ) {
 874                                                 $hebrew = self::tsToHebrew( $ts );
 875                                         }
 876                                         $num = $hebrew[0];
 877                                         break;
 878                                 case 'xkY':
 879                                         if ( !$thai ) {
 880                                                 $thai = self::tsToYear( $ts, 'thai' );
 881                                         }
 882                                         $num = $thai[0];
 883                                         break;
 884                                 case 'xoY':
 885                                         if ( !$minguo ) {
 886                                                 $minguo = self::tsToYear( $ts, 'minguo' );
 887                                         }
 888                                         $num = $minguo[0];
 889                                         break;
 890                                 case 'xtY':
 891                                         if ( !$tenno ) {
 892                                                 $tenno = self::tsToYear( $ts, 'tenno' );
 893                                         }
 894                                         $num = $tenno[0];
 895                                         break;
 896                                 case 'y':
 897                                         $num = substr( $ts, 2, 2 );
 898                                         break;
 899                                 case 'a':
 900                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
 901                                         break;
 902                                 case 'A':
 903                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
 904                                         break;
 905                                 case 'g':
 906                                         $h = substr( $ts, 8, 2 );
 907                                         $num = $h % 12 ? $h % 12 : 12;
 908                                         break;
 909                                 case 'G':
 910                                         $num = intval( substr( $ts, 8, 2 ) );
 911                                         break;
 912                                 case 'h':
 913                                         $h = substr( $ts, 8, 2 );
 914                                         $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
 915                                         break;
 916                                 case 'H':
 917                                         $num = substr( $ts, 8, 2 );
 918                                         break;
 919                                 case 'i':
 920                                         $num = substr( $ts, 10, 2 );
 921                                         break;
 922                                 case 's':
 923                                         $num = substr( $ts, 12, 2 );
 924                                         break;
 925                                 case 'c':
 926                                         if ( !$unix ) {
 927                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 928                                         }
 929                                         $s .= gmdate( 'c', $unix );
 930                                         break;
 931                                 case 'r':
 932                                         if ( !$unix ) {
 933                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 934                                         }
 935                                         $s .= gmdate( 'r', $unix );
 936                                         break;
 937                                 case 'U':
 938                                         if ( !$unix ) {
 939                                                 $unix = wfTimestamp( TS_UNIX, $ts );
 940                                         }
 941                                         $num = $unix;
 942                                         break;
 943                                 case '\\':
 944                                         # Backslash escaping
 945                                         if ( $p < strlen( $format ) - 1 ) {
 946                                                 $s .= $format[++$p];
 947                                         } else {
 948                                                 $s .= '\\';
 949                                         }
 950                                         break;
 951                                 case '"':
 952                                         # Quoted literal
 953                                         if ( $p < strlen( $format ) - 1 ) {
 954                                                 $endQuote = strpos( $format, '"', $p + 1 );
 955                                                 if ( $endQuote === false ) {
 956                                                         # No terminating quote, assume literal "
 957                                                         $s .= '"';
 958                                                 } else {
 959                                                         $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
 960                                                         $p = $endQuote;
 961                                                 }
 962                                         } else {
 963                                                 # Quote at end of string, assume literal "
 964                                                 $s .= '"';
 965                                         }
 966                                         break;
 967                                 default:
 968                                         $s .= $format[$p];
 969                         }
 970                         if ( $num !== false ) {
 971                                 if ( $rawToggle || $raw ) {
 972                                         $s .= $num;
 973                                         $raw = false;
 974                                 } elseif ( $roman ) {
 975                                         $s .= self::romanNumeral( $num );
 976                                         $roman = false;
 977                                 } elseif ( $hebrewNum ) {
 978                                         $s .= self::hebrewNumeral( $num );
 979                                         $hebrewNum = false;
 980                                 } else {
 981                                         $s .= $this->formatNum( $num, true );
 982                                 }
 983                                 $num = false;
 984                         }
 985                 }
 986                 return $s;
 987         }
 988
 989         private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
 990         private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
 991         /**
 992          * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
 993          * Gregorian dates to Iranian dates. Originally written in C, it
 994          * is released under the terms of GNU Lesser General Public
 995          * License. Conversion to PHP was performed by Niklas Laxström.
 996          *
 997          * Link: http://www.farsiweb.info/jalali/jalali.c
 998          */
 999         private static function tsToIranian( $ts ) {
1000                 $gy = substr( $ts, 0, 4 ) -1600;
1001                 $gm = substr( $ts, 4, 2 ) -1;
1002                 $gd = substr( $ts, 6, 2 ) -1;
1003
1004                 # Days passed from the beginning (including leap years)
1005                 $gDayNo = 365 * $gy
1006                         + floor( ( $gy + 3 ) / 4 )
1007                         - floor( ( $gy + 99 ) / 100 )
1008                         + floor( ( $gy + 399 ) / 400 );
1009
1010
1011                 // Add days of the past months of this year
1012                 for ( $i = 0; $i < $gm; $i++ ) {
1013                         $gDayNo += self::$GREG_DAYS[$i];
1014                 }
1015
1016                 // Leap years
1017                 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1018                         $gDayNo++;
1019                 }
1020
1021                 // Days passed in current month
1022                 $gDayNo += $gd;
1023
1024                 $jDayNo = $gDayNo - 79;
1025
1026                 $jNp = floor( $jDayNo / 12053 );
1027                 $jDayNo %= 12053;
1028
1029                 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1030                 $jDayNo %= 1461;
1031
1032                 if ( $jDayNo >= 366 ) {
1033                         $jy += floor( ( $jDayNo - 1 ) / 365 );
1034                         $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1035                 }
1036
1037                 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1038                         $jDayNo -= self::$IRANIAN_DAYS[$i];
1039                 }
1040
1041                 $jm = $i + 1;
1042                 $jd = $jDayNo + 1;
1043
1044                 return array( $jy, $jm, $jd );
1045         }
1046
1047         /**
1048          * Converting Gregorian dates to Hijri dates.
1049          *
1050          * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1051          *
1052          * @link http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1053          */
1054         private static function tsToHijri( $ts ) {
1055                 $year = substr( $ts, 0, 4 );
1056                 $month = substr( $ts, 4, 2 );
1057                 $day = substr( $ts, 6, 2 );
1058
1059                 $zyr = $year;
1060                 $zd = $day;
1061                 $zm = $month;
1062                 $zy = $zyr;
1063
1064                 if (
1065                         ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1066                         ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1067                 )
1068                 {
1069                         $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1070                                         (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1071                                         (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1072                                         $zd - 32075;
1073                 } else {
1074                         $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1075                                                                 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1076                 }
1077
1078                 $zl = $zjd -1948440 + 10632;
1079                 $zn = (int)( ( $zl - 1 ) / 10631 );
1080                 $zl = $zl - 10631 * $zn + 354;
1081                 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1082                 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1083                 $zm = (int)( ( 24 * $zl ) / 709 );
1084                 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1085                 $zy = 30 * $zn + $zj - 30;
1086
1087                 return array( $zy, $zm, $zd );
1088         }
1089
1090         /**
1091          * Converting Gregorian dates to Hebrew dates.
1092          *
1093          * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1094          * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1095          * to translate the relevant functions into PHP and release them under
1096          * GNU GPL.
1097          *
1098          * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1099          * and Adar II is 14. In a non-leap year, Adar is 6.
1100          */
1101         private static function tsToHebrew( $ts ) {
1102                 # Parse date
1103                 $year = substr( $ts, 0, 4 );
1104                 $month = substr( $ts, 4, 2 );
1105                 $day = substr( $ts, 6, 2 );
1106
1107                 # Calculate Hebrew year
1108                 $hebrewYear = $year + 3760;
1109
1110                 # Month number when September = 1, August = 12
1111                 $month += 4;
1112                 if ( $month > 12 ) {
1113                         # Next year
1114                         $month -= 12;
1115                         $year++;
1116                         $hebrewYear++;
1117                 }
1118
1119                 # Calculate day of year from 1 September
1120                 $dayOfYear = $day;
1121                 for ( $i = 1; $i < $month; $i++ ) {
1122                         if ( $i == 6 ) {
1123                                 # February
1124                                 $dayOfYear += 28;
1125                                 # Check if the year is leap
1126                                 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1127                                         $dayOfYear++;
1128                                 }
1129                         } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1130                                 $dayOfYear += 30;
1131                         } else {
1132                                 $dayOfYear += 31;
1133                         }
1134                 }
1135
1136                 # Calculate the start of the Hebrew year
1137                 $start = self::hebrewYearStart( $hebrewYear );
1138
1139                 # Calculate next year's start
1140                 if ( $dayOfYear <= $start ) {
1141                         # Day is before the start of the year - it is the previous year
1142                         # Next year's start
1143                         $nextStart = $start;
1144                         # Previous year
1145                         $year--;
1146                         $hebrewYear--;
1147                         # Add days since previous year's 1 September
1148                         $dayOfYear += 365;
1149                         if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1150                                 # Leap year
1151                                 $dayOfYear++;
1152                         }
1153                         # Start of the new (previous) year
1154                         $start = self::hebrewYearStart( $hebrewYear );
1155                 } else {
1156                         # Next year's start
1157                         $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1158                 }
1159
1160                 # Calculate Hebrew day of year
1161                 $hebrewDayOfYear = $dayOfYear - $start;
1162
1163                 # Difference between year's days
1164                 $diff = $nextStart - $start;
1165                 # Add 12 (or 13 for leap years) days to ignore the difference between
1166                 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1167                 # difference is only about the year type
1168                 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1169                         $diff += 13;
1170                 } else {
1171                         $diff += 12;
1172                 }
1173
1174                 # Check the year pattern, and is leap year
1175                 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1176                 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1177                 # and non-leap years
1178                 $yearPattern = $diff % 30;
1179                 # Check if leap year
1180                 $isLeap = $diff >= 30;
1181
1182                 # Calculate day in the month from number of day in the Hebrew year
1183                 # Don't check Adar - if the day is not in Adar, we will stop before;
1184                 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1185                 $hebrewDay = $hebrewDayOfYear;
1186                 $hebrewMonth = 1;
1187                 $days = 0;
1188                 while ( $hebrewMonth <= 12 ) {
1189                         # Calculate days in this month
1190                         if ( $isLeap && $hebrewMonth == 6 ) {
1191                                 # Adar in a leap year
1192                                 if ( $isLeap ) {
1193                                         # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1194                                         $days = 30;
1195                                         if ( $hebrewDay <= $days ) {
1196                                                 # Day in Adar I
1197                                                 $hebrewMonth = 13;
1198                                         } else {
1199                                                 # Subtract the days of Adar I
1200                                                 $hebrewDay -= $days;
1201                                                 # Try Adar II
1202                                                 $days = 29;
1203                                                 if ( $hebrewDay <= $days ) {
1204                                                         # Day in Adar II
1205                                                         $hebrewMonth = 14;
1206                                                 }
1207                                         }
1208                                 }
1209                         } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1210                                 # Cheshvan in a complete year (otherwise as the rule below)
1211                                 $days = 30;
1212                         } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1213                                 # Kislev in an incomplete year (otherwise as the rule below)
1214                                 $days = 29;
1215                         } else {
1216                                 # Odd months have 30 days, even have 29
1217                                 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1218                         }
1219                         if ( $hebrewDay <= $days ) {
1220                                 # In the current month
1221                                 break;
1222                         } else {
1223                                 # Subtract the days of the current month
1224                                 $hebrewDay -= $days;
1225                                 # Try in the next month
1226                                 $hebrewMonth++;
1227                         }
1228                 }
1229
1230                 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1231         }
1232
1233         /**
1234          * This calculates the Hebrew year start, as days since 1 September.
1235          * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1236          * Used for Hebrew date.
1237          */
1238         private static function hebrewYearStart( $year ) {
1239                 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1240                 $b = intval( ( $year - 1 ) % 4 );
1241                 $m = 32.044093161144 + 1.5542417966212 * $a +  $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1242                 if ( $m < 0 ) {
1243                         $m--;
1244                 }
1245                 $Mar = intval( $m );
1246                 if ( $m < 0 ) {
1247                         $m++;
1248                 }
1249                 $m -= $Mar;
1250
1251                 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1252                 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1253                         $Mar++;
1254                 } else if ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1255                         $Mar += 2;
1256                 } else if ( $c == 2 || $c == 4 || $c == 6 ) {
1257                         $Mar++;
1258                 }
1259
1260                 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1261                 return $Mar;
1262         }
1263
1264         /**
1265          * Algorithm to convert Gregorian dates to Thai solar dates,
1266          * Minguo dates or Minguo dates.
1267          *
1268          * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1269          *       http://en.wikipedia.org/wiki/Minguo_calendar
1270          *       http://en.wikipedia.org/wiki/Japanese_era_name
1271          *
1272          * @param $ts String: 14-character timestamp
1273          * @param $cName String: calender name
1274          * @return Array: converted year, month, day
1275          */
1276         private static function tsToYear( $ts, $cName ) {
1277                 $gy = substr( $ts, 0, 4 );
1278                 $gm = substr( $ts, 4, 2 );
1279                 $gd = substr( $ts, 6, 2 );
1280
1281                 if ( !strcmp( $cName, 'thai' ) ) {
1282                         # Thai solar dates
1283                         # Add 543 years to the Gregorian calendar
1284                         # Months and days are identical
1285                         $gy_offset = $gy + 543;
1286                 } else if ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1287                         # Minguo dates
1288                         # Deduct 1911 years from the Gregorian calendar
1289                         # Months and days are identical
1290                         $gy_offset = $gy - 1911;
1291                 } else if ( !strcmp( $cName, 'tenno' ) ) {
1292                         # Nengō dates up to Meiji period
1293                         # Deduct years from the Gregorian calendar
1294                         # depending on the nengo periods
1295                         # Months and days are identical
1296                         if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1297                                 # Meiji period
1298                                 $gy_gannen = $gy - 1868 + 1;
1299                                 $gy_offset = $gy_gannen;
1300                                 if ( $gy_gannen == 1 ) {
1301                                         $gy_offset = '元';
1302                                 }
1303                                 $gy_offset = '明治' . $gy_offset;
1304                         } else if (
1305                                 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1306                                 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1307                                 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1308                                 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1309                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1310                         )
1311                         {
1312                                 # Taishō period
1313                                 $gy_gannen = $gy - 1912 + 1;
1314                                 $gy_offset = $gy_gannen;
1315                                 if ( $gy_gannen == 1 ) {
1316                                         $gy_offset = '元';
1317                                 }
1318                                 $gy_offset = '大正' . $gy_offset;
1319                         } else if (
1320                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1321                                 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1322                                 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1323                         )
1324                         {
1325                                 # Shōwa period
1326                                 $gy_gannen = $gy - 1926 + 1;
1327                                 $gy_offset = $gy_gannen;
1328                                 if ( $gy_gannen == 1 ) {
1329                                         $gy_offset = '元';
1330                                 }
1331                                 $gy_offset = '昭和' . $gy_offset;
1332                         } else {
1333                                 # Heisei period
1334                                 $gy_gannen = $gy - 1989 + 1;
1335                                 $gy_offset = $gy_gannen;
1336                                 if ( $gy_gannen == 1 ) {
1337                                         $gy_offset = '元';
1338                                 }
1339                                 $gy_offset = '平成' . $gy_offset;
1340                         }
1341                 } else {
1342                         $gy_offset = $gy;
1343                 }
1344
1345                 return array( $gy_offset, $gm, $gd );
1346         }
1347
1348         /**
1349          * Roman number formatting up to 3000
1350          */
1351         static function romanNumeral( $num ) {
1352                 static $table = array(
1353                         array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1354                         array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1355                         array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1356                         array( '', 'M', 'MM', 'MMM' )
1357                 );
1358
1359                 $num = intval( $num );
1360                 if ( $num > 3000 || $num <= 0 ) {
1361                         return $num;
1362                 }
1363
1364                 $s = '';
1365                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1366                         if ( $num >= $pow10 ) {
1367                                 $s .= $table[$i][floor( $num / $pow10 )];
1368                         }
1369                         $num = $num % $pow10;
1370                 }
1371                 return $s;
1372         }
1373
1374         /**
1375          * Hebrew Gematria number formatting up to 9999
1376          */
1377         static function hebrewNumeral( $num ) {
1378                 static $table = array(
1379                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1380                         array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1381                         array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1382                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1383                 );
1384
1385                 $num = intval( $num );
1386                 if ( $num > 9999 || $num <= 0 ) {
1387                         return $num;
1388                 }
1389
1390                 $s = '';
1391                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1392                         if ( $num >= $pow10 ) {
1393                                 if ( $num == 15 || $num == 16 ) {
1394                                         $s .= $table[0][9] . $table[0][$num - 9];
1395                                         $num = 0;
1396                                 } else {
1397                                         $s .= $table[$i][intval( ( $num / $pow10 ) )];
1398                                         if ( $pow10 == 1000 ) {
1399                                                 $s .= "'";
1400                                         }
1401                                 }
1402                         }
1403                         $num = $num % $pow10;
1404                 }
1405                 if ( strlen( $s ) == 2 ) {
1406                         $str = $s . "'";
1407                 } else  {
1408                         $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1409                         $str .= substr( $s, strlen( $s ) - 2, 2 );
1410                 }
1411                 $start = substr( $str, 0, strlen( $str ) - 2 );
1412                 $end = substr( $str, strlen( $str ) - 2 );
1413                 switch( $end ) {
1414                         case 'כ':
1415                                 $str = $start . 'ך';
1416                                 break;
1417                         case 'מ':
1418                                 $str = $start . 'ם';
1419                                 break;
1420                         case 'נ':
1421                                 $str = $start . 'ן';
1422                                 break;
1423                         case 'פ':
1424                                 $str = $start . 'ף';
1425                                 break;
1426                         case 'צ':
1427                                 $str = $start . 'ץ';
1428                                 break;
1429                 }
1430                 return $str;
1431         }
1432
1433         /**
1434          * This is meant to be used by time(), date(), and timeanddate() to get
1435          * the date preference they're supposed to use, it should be used in
1436          * all children.
1437          *
1438          *<code>
1439          * function timeanddate([...], $format = true) {
1440          *      $datePreference = $this->dateFormat($format);
1441          * [...]
1442          * }
1443          *</code>
1444          *
1445          * @param $usePrefs Mixed: if true, the user's preference is used
1446          *                         if false, the site/language default is used
1447          *                         if int/string, assumed to be a format.
1448          * @return string
1449          */
1450         function dateFormat( $usePrefs = true ) {
1451                 global $wgUser;
1452
1453                 if ( is_bool( $usePrefs ) ) {
1454                         if ( $usePrefs ) {
1455                                 $datePreference = $wgUser->getDatePreference();
1456                         } else {
1457                                 $datePreference = (string)User::getDefaultOption( 'date' );
1458                         }
1459                 } else {
1460                         $datePreference = (string)$usePrefs;
1461                 }
1462
1463                 // return int
1464                 if ( $datePreference == '' ) {
1465                         return 'default';
1466                 }
1467
1468                 return $datePreference;
1469         }
1470
1471         /**
1472          * Get a format string for a given type and preference
1473          * @param $type May be date, time or both
1474          * @param $pref The format name as it appears in Messages*.php
1475          */
1476         function getDateFormatString( $type, $pref ) {
1477                 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1478                         if ( $pref == 'default' ) {
1479                                 $pref = $this->getDefaultDateFormat();
1480                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1481                         } else {
1482                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1483                                 if ( is_null( $df ) ) {
1484                                         $pref = $this->getDefaultDateFormat();
1485                                         $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
1486                                 }
1487                         }
1488                         $this->dateFormatStrings[$type][$pref] = $df;
1489                 }
1490                 return $this->dateFormatStrings[$type][$pref];
1491         }
1492
1493         /**
1494          * @param $ts Mixed: the time format which needs to be turned into a
1495          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1496          * @param $adj Bool: whether to adjust the time output according to the
1497          *             user configured offset ($timecorrection)
1498          * @param $format Mixed: true to use user's date format preference
1499          * @param $timecorrection String: the time offset as returned by
1500          *                        validateTimeZone() in Special:Preferences
1501          * @return string
1502          */
1503         function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
1504                 if ( $adj ) {
1505                         $ts = $this->userAdjust( $ts, $timecorrection );
1506                 }
1507                 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
1508                 return $this->sprintfDate( $df, $ts );
1509         }
1510
1511         /**
1512          * @param $ts Mixed: the time format which needs to be turned into a
1513          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1514          * @param $adj Bool: whether to adjust the time output according to the
1515          *             user configured offset ($timecorrection)
1516          * @param $format Mixed: true to use user's date format preference
1517          * @param $timecorrection String: the time offset as returned by
1518          *                        validateTimeZone() in Special:Preferences
1519          * @return string
1520          */
1521         function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
1522                 if ( $adj ) {
1523                         $ts = $this->userAdjust( $ts, $timecorrection );
1524                 }
1525                 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
1526                 return $this->sprintfDate( $df, $ts );
1527         }
1528
1529         /**
1530          * @param $ts Mixed: the time format which needs to be turned into a
1531          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
1532          * @param $adj Bool: whether to adjust the time output according to the
1533          *             user configured offset ($timecorrection)
1534          * @param $format Mixed: what format to return, if it's false output the
1535          *                default one (default true)
1536          * @param $timecorrection String: the time offset as returned by
1537          *                        validateTimeZone() in Special:Preferences
1538          * @return string
1539          */
1540         function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
1541                 $ts = wfTimestamp( TS_MW, $ts );
1542                 if ( $adj ) {
1543                         $ts = $this->userAdjust( $ts, $timecorrection );
1544                 }
1545                 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
1546                 return $this->sprintfDate( $df, $ts );
1547         }
1548
1549         function getMessage( $key ) {
1550                 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
1551         }
1552
1553         function getAllMessages() {
1554                 return self::$dataCache->getItem( $this->mCode, 'messages' );
1555         }
1556
1557         function iconv( $in, $out, $string ) {
1558                 # This is a wrapper for iconv in all languages except esperanto,
1559                 # which does some nasty x-conversions beforehand
1560
1561                 # Even with //IGNORE iconv can whine about illegal characters in
1562                 # *input* string. We just ignore those too.
1563                 # REF: http://bugs.php.net/bug.php?id=37166
1564                 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
1565                 wfSuppressWarnings();
1566                 $text = iconv( $in, $out . '//IGNORE', $string );
1567                 wfRestoreWarnings();
1568                 return $text;
1569         }
1570
1571         // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
1572         function ucwordbreaksCallbackAscii( $matches ) {
1573                 return $this->ucfirst( $matches[1] );
1574         }
1575
1576         function ucwordbreaksCallbackMB( $matches ) {
1577                 return mb_strtoupper( $matches[0] );
1578         }
1579
1580         function ucCallback( $matches ) {
1581                 list( $wikiUpperChars ) = self::getCaseMaps();
1582                 return strtr( $matches[1], $wikiUpperChars );
1583         }
1584
1585         function lcCallback( $matches ) {
1586                 list( , $wikiLowerChars ) = self::getCaseMaps();
1587                 return strtr( $matches[1], $wikiLowerChars );
1588         }
1589
1590         function ucwordsCallbackMB( $matches ) {
1591                 return mb_strtoupper( $matches[0] );
1592         }
1593
1594         function ucwordsCallbackWiki( $matches ) {
1595                 list( $wikiUpperChars ) = self::getCaseMaps();
1596                 return strtr( $matches[0], $wikiUpperChars );
1597         }
1598
1599         function ucfirst( $str ) {
1600                 $o = ord( $str );
1601                 if ( $o < 96 ) {
1602                         return $str;
1603                 } elseif ( $o < 128 ) {
1604                         return ucfirst( $str );
1605                 } else {
1606                         // fall back to more complex logic in case of multibyte strings
1607                         return $this->uc( $str, true );
1608                 }
1609         }
1610
1611         function uc( $str, $first = false ) {
1612                 if ( function_exists( 'mb_strtoupper' ) ) {
1613                         if ( $first ) {
1614                                 if ( $this->isMultibyte( $str ) ) {
1615                                         return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1616                                 } else {
1617                                         return ucfirst( $str );
1618                                 }
1619                         } else {
1620                                 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
1621                         }
1622                 } else {
1623                         if ( $this->isMultibyte( $str ) ) {
1624                                 $x = $first ? '^' : '';
1625                                 return preg_replace_callback(
1626                                         "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1627                                         array( $this, 'ucCallback' ),
1628                                         $str
1629                                 );
1630                         } else {
1631                                 return $first ? ucfirst( $str ) : strtoupper( $str );
1632                         }
1633                 }
1634         }
1635
1636         function lcfirst( $str ) {
1637                 $o = ord( $str );
1638                 if ( !$o ) {
1639                         return strval( $str );
1640                 } elseif ( $o >= 128 ) {
1641                         return $this->lc( $str, true );
1642                 } elseif ( $o > 96 ) {
1643                         return $str;
1644                 } else {
1645                         $str[0] = strtolower( $str[0] );
1646                         return $str;
1647                 }
1648         }
1649
1650         function lc( $str, $first = false ) {
1651                 if ( function_exists( 'mb_strtolower' ) ) {
1652                         if ( $first ) {
1653                                 if ( $this->isMultibyte( $str ) ) {
1654                                         return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
1655                                 } else {
1656                                         return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
1657                                 }
1658                         } else {
1659                                 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
1660                         }
1661                 } else {
1662                         if ( $this->isMultibyte( $str ) ) {
1663                                 $x = $first ? '^' : '';
1664                                 return preg_replace_callback(
1665                                         "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
1666                                         array( $this, 'lcCallback' ),
1667                                         $str
1668                                 );
1669                         } else {
1670                                 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
1671                         }
1672                 }
1673         }
1674
1675         function isMultibyte( $str ) {
1676                 return (bool)preg_match( '/[\x80-\xff]/', $str );
1677         }
1678
1679         function ucwords( $str ) {
1680                 if ( $this->isMultibyte( $str ) ) {
1681                         $str = $this->lc( $str );
1682
1683                         // regexp to find first letter in each word (i.e. after each space)
1684                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1685
1686                         // function to use to capitalize a single char
1687                         if ( function_exists( 'mb_strtoupper' ) ) {
1688                                 return preg_replace_callback(
1689                                         $replaceRegexp,
1690                                         array( $this, 'ucwordsCallbackMB' ),
1691                                         $str
1692                                 );
1693                         } else {
1694                                 return preg_replace_callback(
1695                                         $replaceRegexp,
1696                                         array( $this, 'ucwordsCallbackWiki' ),
1697                                         $str
1698                                 );
1699                         }
1700                 } else {
1701                         return ucwords( strtolower( $str ) );
1702                 }
1703         }
1704
1705         # capitalize words at word breaks
1706         function ucwordbreaks( $str ) {
1707                 if ( $this->isMultibyte( $str ) ) {
1708                         $str = $this->lc( $str );
1709
1710                         // since \b doesn't work for UTF-8, we explicitely define word break chars
1711                         $breaks = "[ \-\(\)\}\{\.,\?!]";
1712
1713                         // find first letter after word break
1714                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
1715
1716                         if ( function_exists( 'mb_strtoupper' ) ) {
1717                                 return preg_replace_callback(
1718                                         $replaceRegexp,
1719                                         array( $this, 'ucwordbreaksCallbackMB' ),
1720                                         $str
1721                                 );
1722                         } else {
1723                                 return preg_replace_callback(
1724                                         $replaceRegexp,
1725                                         array( $this, 'ucwordsCallbackWiki' ),
1726                                         $str
1727                                 );
1728                         }
1729                 } else {
1730                         return preg_replace_callback(
1731                                 '/\b([\w\x80-\xff]+)\b/',
1732                                 array( $this, 'ucwordbreaksCallbackAscii' ),
1733                                 $str
1734                         );
1735                 }
1736         }
1737
1738         /**
1739          * Return a case-folded representation of $s
1740          *
1741          * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
1742          * and $s2 are the same except for the case of their characters. It is not
1743          * necessary for the value returned to make sense when displayed.
1744          *
1745          * Do *not* perform any other normalisation in this function. If a caller
1746          * uses this function when it should be using a more general normalisation
1747          * function, then fix the caller.
1748          */
1749         function caseFold( $s ) {
1750                 return $this->uc( $s );
1751         }
1752
1753         function checkTitleEncoding( $s ) {
1754                 if ( is_array( $s ) ) {
1755                         wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
1756                 }
1757                 # Check for non-UTF-8 URLs
1758                 $ishigh = preg_match( '/[\x80-\xff]/', $s );
1759                 if ( !$ishigh ) {
1760                         return $s;
1761                 }
1762
1763                 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1764                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1765                 if ( $isutf8 ) {
1766                         return $s;
1767                 }
1768
1769                 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
1770         }
1771
1772         function fallback8bitEncoding() {
1773                 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
1774         }
1775
1776         /**
1777          * Most writing systems use whitespace to break up words.
1778          * Some languages such as Chinese don't conventionally do this,
1779          * which requires special handling when breaking up words for
1780          * searching etc.
1781          */
1782         function hasWordBreaks() {
1783                 return true;
1784         }
1785
1786         /**
1787          * Some languages such as Chinese require word segmentation,
1788          * Specify such segmentation when overridden in derived class.
1789          *
1790          * @param $string String
1791          * @return String
1792          */
1793         function segmentByWord( $string ) {
1794                 return $string;
1795         }
1796
1797         /**
1798          * Some languages have special punctuation need to be normalized.
1799          * Make such changes here.
1800          *
1801          * @param $string String
1802          * @return String
1803          */
1804         function normalizeForSearch( $string ) {
1805                 return self::convertDoubleWidth( $string );
1806         }
1807
1808         /**
1809          * convert double-width roman characters to single-width.
1810          * range: ff00-ff5f ~= 0020-007f
1811          */
1812         protected static function convertDoubleWidth( $string ) {
1813                 static $full = null;
1814                 static $half = null;
1815
1816                 if ( $full === null ) {
1817                         $fullWidth = "０１２３４５６７８９ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ";
1818                         $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1819                         $full = str_split( $fullWidth, 3 );
1820                         $half = str_split( $halfWidth );
1821                 }
1822
1823                 $string = str_replace( $full, $half, $string );
1824                 return $string;
1825         }
1826
1827         protected static function insertSpace( $string, $pattern ) {
1828                 $string = preg_replace( $pattern, " $1 ", $string );
1829                 $string = preg_replace( '/ +/', ' ', $string );
1830                 return $string;
1831         }
1832
1833         function convertForSearchResult( $termsArray ) {
1834                 # some languages, e.g. Chinese, need to do a conversion
1835                 # in order for search results to be displayed correctly
1836                 return $termsArray;
1837         }
1838
1839         /**
1840          * Get the first character of a string.
1841          *
1842          * @param $s string
1843          * @return string
1844          */
1845         function firstChar( $s ) {
1846                 $matches = array();
1847                 preg_match(
1848                         '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1849                                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
1850                         $s,
1851                         $matches
1852                 );
1853
1854                 if ( isset( $matches[1] ) ) {
1855                         if ( strlen( $matches[1] ) != 3 ) {
1856                                 return $matches[1];
1857                         }
1858
1859                         // Break down Hangul syllables to grab the first jamo
1860                         $code = utf8ToCodepoint( $matches[1] );
1861                         if ( $code < 0xac00 || 0xd7a4 <= $code ) {
1862                                 return $matches[1];
1863                         } elseif ( $code < 0xb098 ) {
1864                                 return "\xe3\x84\xb1";
1865                         } elseif ( $code < 0xb2e4 ) {
1866                                 return "\xe3\x84\xb4";
1867                         } elseif ( $code < 0xb77c ) {
1868                                 return "\xe3\x84\xb7";
1869                         } elseif ( $code < 0xb9c8 ) {
1870                                 return "\xe3\x84\xb9";
1871                         } elseif ( $code < 0xbc14 ) {
1872                                 return "\xe3\x85\x81";
1873                         } elseif ( $code < 0xc0ac ) {
1874                                 return "\xe3\x85\x82";
1875                         } elseif ( $code < 0xc544 ) {
1876                                 return "\xe3\x85\x85";
1877                         } elseif ( $code < 0xc790 ) {
1878                                 return "\xe3\x85\x87";
1879                         } elseif ( $code < 0xcc28 ) {
1880                                 return "\xe3\x85\x88";
1881                         } elseif ( $code < 0xce74 ) {
1882                                 return "\xe3\x85\x8a";
1883                         } elseif ( $code < 0xd0c0 ) {
1884                                 return "\xe3\x85\x8b";
1885                         } elseif ( $code < 0xd30c ) {
1886                                 return "\xe3\x85\x8c";
1887                         } elseif ( $code < 0xd558 ) {
1888                                 return "\xe3\x85\x8d";
1889                         } else {
1890                                 return "\xe3\x85\x8e";
1891                         }
1892                 } else {
1893                         return '';
1894                 }
1895         }
1896
1897         function initEncoding() {
1898                 # Some languages may have an alternate char encoding option
1899                 # (Esperanto X-coding, Japanese furigana conversion, etc)
1900                 # If this language is used as the primary content language,
1901                 # an override to the defaults can be set here on startup.
1902         }
1903
1904         function recodeForEdit( $s ) {
1905                 # For some languages we'll want to explicitly specify
1906                 # which characters make it into the edit box raw
1907                 # or are converted in some way or another.
1908                 # Note that if wgOutputEncoding is different from
1909                 # wgInputEncoding, this text will be further converted
1910                 # to wgOutputEncoding.
1911                 global $wgEditEncoding;
1912                 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
1913                         return $s;
1914                 } else {
1915                         return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1916                 }
1917         }
1918
1919         function recodeInput( $s ) {
1920                 # Take the previous into account.
1921                 global $wgEditEncoding;
1922                 if ( $wgEditEncoding != '' ) {
1923                         $enc = $wgEditEncoding;
1924                 } else {
1925                         $enc = 'UTF-8';
1926                 }
1927                 if ( $enc == 'UTF-8' ) {
1928                         return $s;
1929                 } else {
1930                         return $this->iconv( $enc, 'UTF-8', $s );
1931                 }
1932         }
1933
1934         /**
1935          * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
1936          * also cleans up certain backwards-compatible sequences, converting them
1937          * to the modern Unicode equivalent.
1938          *
1939          * This is language-specific for performance reasons only.
1940          */
1941         function normalize( $s ) {
1942                 global $wgAllUnicodeFixes;
1943                 $s = UtfNormal::cleanUp( $s );
1944                 if ( $wgAllUnicodeFixes ) {
1945                         $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
1946                         $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
1947                 }
1948
1949                 return $s;
1950         }
1951
1952         /**
1953          * Transform a string using serialized data stored in the given file (which
1954          * must be in the serialized subdirectory of $IP). The file contains pairs
1955          * mapping source characters to destination characters.
1956          *
1957          * The data is cached in process memory. This will go faster if you have the
1958          * FastStringSearch extension.
1959          */
1960         function transformUsingPairFile( $file, $string ) {
1961                 if ( !isset( $this->transformData[$file] ) ) {
1962                         $data = wfGetPrecompiledData( $file );
1963                         if ( $data === false ) {
1964                                 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
1965                         }
1966                         $this->transformData[$file] = new ReplacementArray( $data );
1967                 }
1968                 return $this->transformData[$file]->replace( $string );
1969         }
1970
1971         /**
1972          * For right-to-left language support
1973          *
1974          * @return bool
1975          */
1976         function isRTL() {
1977                 return self::$dataCache->getItem( $this->mCode, 'rtl' );
1978         }
1979
1980         /**
1981          * Return the correct HTML 'dir' attribute value for this language.
1982          * @return String
1983          */
1984         function getDir() {
1985                 return $this->isRTL() ? 'rtl' : 'ltr';
1986         }
1987
1988         /**
1989          * Return 'left' or 'right' as appropriate alignment for line-start
1990          * for this language's text direction.
1991          *
1992          * Should be equivalent to CSS3 'start' text-align value....
1993          *
1994          * @return String
1995          */
1996         function alignStart() {
1997                 return $this->isRTL() ? 'right' : 'left';
1998         }
1999
2000         /**
2001          * Return 'right' or 'left' as appropriate alignment for line-end
2002          * for this language's text direction.
2003          *
2004          * Should be equivalent to CSS3 'end' text-align value....
2005          *
2006          * @return String
2007          */
2008         function alignEnd() {
2009                 return $this->isRTL() ? 'left' : 'right';
2010         }
2011
2012         /**
2013          * A hidden direction mark (LRM or RLM), depending on the language direction
2014          *
2015          * @return string
2016          */
2017         function getDirMark() {
2018                 return $this->isRTL() ? "\xE2\x80\x8F" : "\xE2\x80\x8E";
2019         }
2020
2021         function capitalizeAllNouns() {
2022                 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2023         }
2024
2025         /**
2026          * An arrow, depending on the language direction
2027          *
2028          * @return string
2029          */
2030         function getArrow() {
2031                 return $this->isRTL() ? '←' : '→';
2032         }
2033
2034         /**
2035          * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2036          *
2037          * @return bool
2038          */
2039         function linkPrefixExtension() {
2040                 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2041         }
2042
2043         function getMagicWords() {
2044                 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2045         }
2046
2047         # Fill a MagicWord object with data from here
2048         function getMagic( $mw ) {
2049                 if ( !$this->mMagicHookDone ) {
2050                         $this->mMagicHookDone = true;
2051                         wfProfileIn( 'LanguageGetMagic' );
2052                         wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2053                         wfProfileOut( 'LanguageGetMagic' );
2054                 }
2055                 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2056                         $rawEntry = $this->mMagicExtensions[$mw->mId];
2057                 } else {
2058                         $magicWords = $this->getMagicWords();
2059                         if ( isset( $magicWords[$mw->mId] ) ) {
2060                                 $rawEntry = $magicWords[$mw->mId];
2061                         } else {
2062                                 $rawEntry = false;
2063                         }
2064                 }
2065
2066                 if ( !is_array( $rawEntry ) ) {
2067                         error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
2068                 } else {
2069                         $mw->mCaseSensitive = $rawEntry[0];
2070                         $mw->mSynonyms = array_slice( $rawEntry, 1 );
2071                 }
2072         }
2073
2074         /**
2075          * Add magic words to the extension array
2076          */
2077         function addMagicWordsByLang( $newWords ) {
2078                 $code = $this->getCode();
2079                 $fallbackChain = array();
2080                 while ( $code && !in_array( $code, $fallbackChain ) ) {
2081                         $fallbackChain[] = $code;
2082                         $code = self::getFallbackFor( $code );
2083                 }
2084                 if ( !in_array( 'en', $fallbackChain ) ) {
2085                         $fallbackChain[] = 'en';
2086                 }
2087                 $fallbackChain = array_reverse( $fallbackChain );
2088                 foreach ( $fallbackChain as $code ) {
2089                         if ( isset( $newWords[$code] ) ) {
2090                                 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2091                         }
2092                 }
2093         }
2094
2095         /**
2096          * Get special page names, as an associative array
2097          *   case folded alias => real name
2098          */
2099         function getSpecialPageAliases() {
2100                 // Cache aliases because it may be slow to load them
2101                 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2102                         // Initialise array
2103                         $this->mExtendedSpecialPageAliases =
2104                                 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2105                         wfRunHooks( 'LanguageGetSpecialPageAliases',
2106                                 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2107                 }
2108
2109                 return $this->mExtendedSpecialPageAliases;
2110         }
2111
2112         /**
2113          * Italic is unsuitable for some languages
2114          *
2115          * @param $text String: the text to be emphasized.
2116          * @return string
2117          */
2118         function emphasize( $text ) {
2119                 return "<em>$text</em>";
2120         }
2121
2122          /**
2123           * Normally we output all numbers in plain en_US style, that is
2124           * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2125           * point twohundredthirtyfive. However this is not sutable for all
2126           * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2127           * Icelandic just want to use commas instead of dots, and dots instead
2128           * of commas like "293.291,235".
2129           *
2130           * An example of this function being called:
2131           * <code>
2132           * wfMsg( 'message', $wgLang->formatNum( $num ) )
2133           * </code>
2134           *
2135           * See LanguageGu.php for the Gujarati implementation and
2136           * $separatorTransformTable on MessageIs.php for
2137           * the , => . and . => , implementation.
2138           *
2139           * @todo check if it's viable to use localeconv() for the decimal
2140           *       separator thing.
2141           * @param $number Mixed: the string to be formatted, should be an integer
2142           *        or a floating point number.
2143           * @param $nocommafy Bool: set to true for special numbers like dates
2144           * @return string
2145           */
2146         function formatNum( $number, $nocommafy = false ) {
2147                 global $wgTranslateNumerals;
2148                 if ( !$nocommafy ) {
2149                         $number = $this->commafy( $number );
2150                         $s = $this->separatorTransformTable();
2151                         if ( $s ) {
2152                                 $number = strtr( $number, $s );
2153                         }
2154                 }
2155
2156                 if ( $wgTranslateNumerals ) {
2157                         $s = $this->digitTransformTable();
2158                         if ( $s ) {
2159                                 $number = strtr( $number, $s );
2160                         }
2161                 }
2162
2163                 return $number;
2164         }
2165
2166         function parseFormattedNumber( $number ) {
2167                 $s = $this->digitTransformTable();
2168                 if ( $s ) {
2169                         $number = strtr( $number, array_flip( $s ) );
2170                 }
2171
2172                 $s = $this->separatorTransformTable();
2173                 if ( $s ) {
2174                         $number = strtr( $number, array_flip( $s ) );
2175                 }
2176
2177                 $number = strtr( $number, array( ',' => '' ) );
2178                 return $number;
2179         }
2180
2181         /**
2182          * Adds commas to a given number
2183          *
2184          * @param $_ mixed
2185          * @return string
2186          */
2187         function commafy( $_ ) {
2188                 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
2189         }
2190
2191         function digitTransformTable() {
2192                 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
2193         }
2194
2195         function separatorTransformTable() {
2196                 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
2197         }
2198
2199         /**
2200          * Take a list of strings and build a locale-friendly comma-separated
2201          * list, using the local comma-separator message.
2202          * The last two strings are chained with an "and".
2203          *
2204          * @param $l Array
2205          * @return string
2206          */
2207         function listToText( $l ) {
2208                 $s = '';
2209                 $m = count( $l ) - 1;
2210                 if ( $m == 1 ) {
2211                         return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
2212                 } else {
2213                         for ( $i = $m; $i >= 0; $i-- ) {
2214                                 if ( $i == $m ) {
2215                                         $s = $l[$i];
2216                                 } else if ( $i == $m - 1 ) {
2217                                         $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
2218                                 } else {
2219                                         $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
2220                                 }
2221                         }
2222                         return $s;
2223                 }
2224         }
2225
2226         /**
2227          * Take a list of strings and build a locale-friendly comma-separated
2228          * list, using the local comma-separator message.
2229          * @param $list array of strings to put in a comma list
2230          * @return string
2231          */
2232         function commaList( $list ) {
2233                 return implode(
2234                         $list,
2235                         wfMsgExt(
2236                                 'comma-separator',
2237                                 array( 'parsemag', 'escapenoentities', 'language' => $this )
2238                         )
2239                 );
2240         }
2241
2242         /**
2243          * Take a list of strings and build a locale-friendly semicolon-separated
2244          * list, using the local semicolon-separator message.
2245          * @param $list array of strings to put in a semicolon list
2246          * @return string
2247          */
2248         function semicolonList( $list ) {
2249                 return implode(
2250                         $list,
2251                         wfMsgExt(
2252                                 'semicolon-separator',
2253                                 array( 'parsemag', 'escapenoentities', 'language' => $this )
2254                         )
2255                 );
2256         }
2257
2258         /**
2259          * Same as commaList, but separate it with the pipe instead.
2260          * @param $list array of strings to put in a pipe list
2261          * @return string
2262          */
2263         function pipeList( $list ) {
2264                 return implode(
2265                         $list,
2266                         wfMsgExt(
2267                                 'pipe-separator',
2268                                 array( 'escapenoentities', 'language' => $this )
2269                         )
2270                 );
2271         }
2272
2273         /**
2274          * Truncate a string to a specified length in bytes, appending an optional
2275          * string (e.g. for ellipses)
2276          *
2277          * The database offers limited byte lengths for some columns in the database;
2278          * multi-byte character sets mean we need to ensure that only whole characters
2279          * are included, otherwise broken characters can be passed to the user
2280          *
2281          * If $length is negative, the string will be truncated from the beginning
2282          *
2283          * @param $string String to truncate
2284          * @param $length Int: maximum length (excluding ellipses)
2285          * @param $ellipsis String to append to the truncated text
2286          * @return string
2287          */
2288         function truncate( $string, $length, $ellipsis = '...' ) {
2289                 # Use the localized ellipsis character
2290                 if ( $ellipsis == '...' ) {
2291                         $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2292                 }
2293                 # Check if there is no need to truncate
2294                 if ( $length == 0 ) {
2295                         return $ellipsis;
2296                 } elseif ( strlen( $string ) <= abs( $length ) ) {
2297                         return $string;
2298                 }
2299                 $stringOriginal = $string;
2300                 if ( $length > 0 ) {
2301                         $string = substr( $string, 0, $length ); // xyz...
2302                         $string = $this->removeBadCharLast( $string );
2303                         $string = $string . $ellipsis;
2304                 } else {
2305                         $string = substr( $string, $length ); // ...xyz
2306                         $string = $this->removeBadCharFirst( $string );
2307                         $string = $ellipsis . $string;
2308                 }
2309                 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
2310                 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
2311                         return $string;
2312                 } else {
2313                         return $stringOriginal;
2314                 }
2315         }
2316
2317         /**
2318          * Remove bytes that represent an incomplete Unicode character
2319          * at the end of string (e.g. bytes of the char are missing)
2320          *
2321          * @param $string String
2322          * @return string
2323          */
2324         protected function removeBadCharLast( $string ) {
2325                 $char = ord( $string[strlen( $string ) - 1] );
2326                 $m = array();
2327                 if ( $char >= 0xc0 ) {
2328                         # We got the first byte only of a multibyte char; remove it.
2329                         $string = substr( $string, 0, -1 );
2330                 } elseif ( $char >= 0x80 &&
2331                       preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2332                                   '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
2333                 {
2334                         # We chopped in the middle of a character; remove it
2335                         $string = $m[1];
2336                 }
2337                 return $string;
2338         }
2339
2340         /**
2341          * Remove bytes that represent an incomplete Unicode character
2342          * at the start of string (e.g. bytes of the char are missing)
2343          *
2344          * @param $string String
2345          * @return string
2346          */
2347         protected function removeBadCharFirst( $string ) {
2348                 $char = ord( $string[0] );
2349                 if ( $char >= 0x80 && $char < 0xc0 ) {
2350                         # We chopped in the middle of a character; remove the whole thing
2351                         $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2352                 }
2353                 return $string;
2354         }
2355
2356         /*
2357          * Truncate a string of valid HTML to a specified length in bytes,
2358          * appending an optional string (e.g. for ellipses), and return valid HTML
2359          *
2360          * This is only intended for styled/linked text, such as HTML with
2361          * tags like <span> and <a>, were the tags are self-contained (valid HTML)
2362          *
2363          * Note: tries to fix broken HTML with MWTidy
2364          *
2365          * @param string $text String to truncate
2366          * @param int $length (zero/positive) Maximum length (excluding ellipses)
2367          * @param string $ellipsis String to append to the truncated text
2368          * @returns string
2369          */
2370         function truncateHtml( $text, $length, $ellipsis = '...' ) {
2371                 # Use the localized ellipsis character
2372                 if ( $ellipsis == '...' ) {
2373                         $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
2374                 }
2375                 # Check if there is no need to truncate
2376                 if ( $length <= 0 ) {
2377                         return $ellipsis; // no text shown, nothing to format
2378                 } elseif ( strlen( $text ) <= $length ) {
2379                         return $text; // string short enough even *with* HTML
2380                 }
2381                 $text = MWTidy::tidy( $text ); // fix tags
2382                 $displayLen = 0; // innerHTML legth so far
2383                 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
2384                 $tagType = 0; // 0-open, 1-close
2385                 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
2386                 $entityState = 0; // 0-not entity, 1-entity
2387                 $tag = $ret = $ch = '';
2388                 $openTags = array();
2389                 $textLen = strlen( $text );
2390                 for ( $pos = 0; $pos < $textLen; ++$pos ) {
2391                         $ch = $text[$pos];
2392                         $lastCh = $pos ? $text[$pos - 1] : '';
2393                         $ret .= $ch; // add to result string
2394                         if ( $ch == '<' ) {
2395                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
2396                                 $entityState = 0; // for bad HTML
2397                                 $bracketState = 1; // tag started (checking for backslash)
2398                         } elseif ( $ch == '>' ) {
2399                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
2400                                 $entityState = 0; // for bad HTML
2401                                 $bracketState = 0; // out of brackets
2402                         } elseif ( $bracketState == 1 ) {
2403                                 if ( $ch == '/' ) {
2404                                         $tagType = 1; // close tag (e.g. "</span>")
2405                                 } else {
2406                                         $tagType = 0; // open tag (e.g. "<span>")
2407                                         $tag .= $ch;
2408                                 }
2409                                 $bracketState = 2; // building tag name
2410                         } elseif ( $bracketState == 2 ) {
2411                                 if ( $ch != ' ' ) {
2412                                         $tag .= $ch;
2413                                 } else {
2414                                         // Name found (e.g. "<a href=..."), add on tag attributes...
2415                                         $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
2416                                 }
2417                         } elseif ( $bracketState == 0 ) {
2418                                 if ( $entityState ) {
2419                                         if ( $ch == ';' ) {
2420                                                 $entityState = 0;
2421                                                 $displayLen++; // entity is one displayed char
2422                                         }
2423                                 } else {
2424                                         if ( $ch == '&' ) {
2425                                                 $entityState = 1; // entity found, (e.g. "&#160;")
2426                                         } else {
2427                                                 $displayLen++; // this char is displayed
2428                                                 // Add on the other display text after this...
2429                                                 $skipped = $this->truncate_skip(
2430                                                         $ret, $text, "<>&", $pos + 1, $length - $displayLen );
2431                                                 $displayLen += $skipped;
2432                                                 $pos += $skipped;
2433                                         }
2434                                 }
2435                         }
2436                         # Consider truncation once the display length has reached the maximim.
2437                         # Double-check that we're not in the middle of a bracket/entity...
2438                         if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
2439                                 if ( !$testingEllipsis ) {
2440                                         $testingEllipsis = true;
2441                                         # Save where we are; we will truncate here unless
2442                                         # the ellipsis actually makes the string longer.
2443                                         $pOpenTags = $openTags; // save state
2444                                         $pRet = $ret; // save state
2445                                 } elseif ( $displayLen > ( $length + strlen( $ellipsis ) ) ) {
2446                                         # Ellipsis won't make string longer/equal, the truncation point was OK.
2447                                         $openTags = $pOpenTags; // reload state
2448                                         $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
2449                                         $ret .= $ellipsis; // add ellipsis
2450                                         break;
2451                                 }
2452                         }
2453                 }
2454                 if ( $displayLen == 0 ) {
2455                         return ''; // no text shown, nothing to format
2456                 }
2457                 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags ); // for bad HTML
2458                 while ( count( $openTags ) > 0 ) {
2459                         $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
2460                 }
2461                 return $ret;
2462         }
2463
2464         // truncateHtml() helper function
2465         // like strcspn() but adds the skipped chars to $ret
2466         private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
2467                 $skipCount = 0;
2468                 if ( $start < strlen( $text ) ) {
2469                         $skipCount = strcspn( $text, $search, $start, $len );
2470                         $ret .= substr( $text, $start, $skipCount );
2471                 }
2472                 return $skipCount;
2473         }
2474
2475         // truncateHtml() helper function
2476         // (a) push or pop $tag from $openTags as needed
2477         // (b) clear $tag value
2478         private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
2479                 $tag = ltrim( $tag );
2480                 if ( $tag != '' ) {
2481                         if ( $tagType == 0 && $lastCh != '/' ) {
2482                                 $openTags[] = $tag; // tag opened (didn't close itself)
2483                         } else if ( $tagType == 1 ) {
2484                                 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
2485                                         array_pop( $openTags ); // tag closed
2486                                 }
2487                         }
2488                         $tag = '';
2489                 }
2490         }
2491
2492         /**
2493          * Grammatical transformations, needed for inflected languages
2494          * Invoked by putting {{grammar:case|word}} in a message
2495          *
2496          * @param $word string
2497          * @param $case string
2498          * @return string
2499          */
2500         function convertGrammar( $word, $case ) {
2501                 global $wgGrammarForms;
2502                 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
2503                         return $wgGrammarForms[$this->getCode()][$case][$word];
2504                 }
2505                 return $word;
2506         }
2507
2508         /**
2509          * Provides an alternative text depending on specified gender.
2510          * Usage {{gender:username|masculine|feminine|neutral}}.
2511          * username is optional, in which case the gender of current user is used,
2512          * but only in (some) interface messages; otherwise default gender is used.
2513          * If second or third parameter are not specified, masculine is used.
2514          * These details may be overriden per language.
2515          */
2516         function gender( $gender, $forms ) {
2517                 if ( !count( $forms ) ) {
2518                         return '';
2519                 }
2520                 $forms = $this->preConvertPlural( $forms, 2 );
2521                 if ( $gender === 'male' ) {
2522                         return $forms[0];
2523                 }
2524                 if ( $gender === 'female' ) {
2525                         return $forms[1];
2526                 }
2527                 return isset( $forms[2] ) ? $forms[2] : $forms[0];
2528         }
2529
2530         /**
2531          * Plural form transformations, needed for some languages.
2532          * For example, there are 3 form of plural in Russian and Polish,
2533          * depending on "count mod 10". See [[w:Plural]]
2534          * For English it is pretty simple.
2535          *
2536          * Invoked by putting {{plural:count|wordform1|wordform2}}
2537          * or {{plural:count|wordform1|wordform2|wordform3}}
2538          *
2539          * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
2540          *
2541          * @param $count Integer: non-localized number
2542          * @param $forms Array: different plural forms
2543          * @return string Correct form of plural for $count in this language
2544          */
2545         function convertPlural( $count, $forms ) {
2546                 if ( !count( $forms ) ) {
2547                         return '';
2548                 }
2549                 $forms = $this->preConvertPlural( $forms, 2 );
2550
2551                 return ( $count == 1 ) ? $forms[0] : $forms[1];
2552         }
2553
2554         /**
2555          * Checks that convertPlural was given an array and pads it to requested
2556          * amound of forms by copying the last one.
2557          *
2558          * @param $count Integer: How many forms should there be at least
2559          * @param $forms Array of forms given to convertPlural
2560          * @return array Padded array of forms or an exception if not an array
2561          */
2562         protected function preConvertPlural( /* Array */ $forms, $count ) {
2563                 while ( count( $forms ) < $count ) {
2564                         $forms[] = $forms[count( $forms ) - 1];
2565                 }
2566                 return $forms;
2567         }
2568
2569         /**
2570          * For translating of expiry times
2571          * @param $str String: the validated block time in English
2572          * @return Somehow translated block time
2573          * @see LanguageFi.php for example implementation
2574          */
2575         function translateBlockExpiry( $str ) {
2576                 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
2577
2578                 if ( $scBlockExpiryOptions == '-' ) {
2579                         return $str;
2580                 }
2581
2582                 foreach ( explode( ',', $scBlockExpiryOptions ) as $option ) {
2583                         if ( strpos( $option, ':' ) === false ) {
2584                                 continue;
2585                         }
2586                         list( $show, $value ) = explode( ':', $option );
2587                         if ( strcmp( $str, $value ) == 0 ) {
2588                                 return htmlspecialchars( trim( $show ) );
2589                         }
2590                 }
2591
2592                 return $str;
2593         }
2594
2595         /**
2596          * languages like Chinese need to be segmented in order for the diff
2597          * to be of any use
2598          *
2599          * @param $text String
2600          * @return String
2601          */
2602         function segmentForDiff( $text ) {
2603                 return $text;
2604         }
2605
2606         /**
2607          * and unsegment to show the result
2608          *
2609          * @param $text String
2610          * @return String
2611          */
2612         function unsegmentForDiff( $text ) {
2613                 return $text;
2614         }
2615
2616         # convert text to all supported variants
2617         function autoConvertToAllVariants( $text ) {
2618                 return $this->mConverter->autoConvertToAllVariants( $text );
2619         }
2620
2621         # convert text to different variants of a language.
2622         function convert( $text ) {
2623                 return $this->mConverter->convert( $text );
2624         }
2625
2626         # Convert a Title object to a string in the preferred variant
2627         function convertTitle( $title ) {
2628                 return $this->mConverter->convertTitle( $title );
2629         }
2630
2631         # Check if this is a language with variants
2632         function hasVariants() {
2633                 return sizeof( $this->getVariants() ) > 1;
2634         }
2635
2636         # Put custom tags (e.g. -{ }-) around math to prevent conversion
2637         function armourMath( $text ) {
2638                 return $this->mConverter->armourMath( $text );
2639         }
2640
2641         /**
2642          * Perform output conversion on a string, and encode for safe HTML output.
2643          * @param $text String text to be converted
2644          * @param $isTitle Bool whether this conversion is for the article title
2645          * @return string
2646          * @todo this should get integrated somewhere sane
2647          */
2648         function convertHtml( $text, $isTitle = false ) {
2649                 return htmlspecialchars( $this->convert( $text, $isTitle ) );
2650         }
2651
2652         function convertCategoryKey( $key ) {
2653                 return $this->mConverter->convertCategoryKey( $key );
2654         }
2655
2656         /**
2657          * Get the list of variants supported by this langauge
2658          * see sample implementation in LanguageZh.php
2659          *
2660          * @return array an array of language codes
2661          */
2662         function getVariants() {
2663                 return $this->mConverter->getVariants();
2664         }
2665
2666         function getPreferredVariant( $fromUser = true, $fromHeader = false ) {
2667                 return $this->mConverter->getPreferredVariant( $fromUser, $fromHeader );
2668         }
2669
2670         /**
2671          * If a language supports multiple variants, it is
2672          * possible that non-existing link in one variant
2673          * actually exists in another variant. this function
2674          * tries to find it. See e.g. LanguageZh.php
2675          *
2676          * @param $link String: the name of the link
2677          * @param $nt Mixed: the title object of the link
2678          * @param $ignoreOtherCond Boolean: to disable other conditions when
2679          *      we need to transclude a template or update a category's link
2680          * @return null the input parameters may be modified upon return
2681          */
2682         function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
2683                 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
2684         }
2685
2686         /**
2687          * If a language supports multiple variants, converts text
2688          * into an array of all possible variants of the text:
2689          *  'variant' => text in that variant
2690          */
2691         function convertLinkToAllVariants( $text ) {
2692                 return $this->mConverter->convertLinkToAllVariants( $text );
2693         }
2694
2695         /**
2696          * returns language specific options used by User::getPageRenderHash()
2697          * for example, the preferred language variant
2698          *
2699          * @return string
2700          */
2701         function getExtraHashOptions() {
2702                 return $this->mConverter->getExtraHashOptions();
2703         }
2704
2705         /**
2706          * For languages that support multiple variants, the title of an
2707          * article may be displayed differently in different variants. this
2708          * function returns the apporiate title defined in the body of the article.
2709          *
2710          * @return string
2711          */
2712         function getParsedTitle() {
2713                 return $this->mConverter->getParsedTitle();
2714         }
2715
2716         /**
2717          * Enclose a string with the "no conversion" tag. This is used by
2718          * various functions in the Parser
2719          *
2720          * @param $text String: text to be tagged for no conversion
2721          * @param $noParse
2722          * @return string the tagged text
2723          */
2724         function markNoConversion( $text, $noParse = false ) {
2725                 return $this->mConverter->markNoConversion( $text, $noParse );
2726         }
2727
2728         /**
2729          * A regular expression to match legal word-trailing characters
2730          * which should be merged onto a link of the form [[foo]]bar.
2731          *
2732          * @return string
2733          */
2734         function linkTrail() {
2735                 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
2736         }
2737
2738         function getLangObj() {
2739                 return $this;
2740         }
2741
2742         /**
2743          * Get the RFC 3066 code for this language object
2744          */
2745         function getCode() {
2746                 return $this->mCode;
2747         }
2748
2749         function setCode( $code ) {
2750                 $this->mCode = $code;
2751         }
2752
2753         /**
2754          * Get the name of a file for a certain language code
2755          * @param $prefix string Prepend this to the filename
2756          * @param $code string Language code
2757          * @param $suffix string Append this to the filename
2758          * @return string $prefix . $mangledCode . $suffix
2759          */
2760         static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
2761                 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
2762         }
2763
2764         /**
2765          * Get the language code from a file name. Inverse of getFileName()
2766          * @param $filename string $prefix . $languageCode . $suffix
2767          * @param $prefix string Prefix before the language code
2768          * @param $suffix string Suffix after the language code
2769          * @return Language code, or false if $prefix or $suffix isn't found
2770          */
2771         static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
2772                 $m = null;
2773                 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
2774                         preg_quote( $suffix, '/' ) . '/', $filename, $m );
2775                 if ( !count( $m ) ) {
2776                         return false;
2777                 }
2778                 return str_replace( '_', '-', strtolower( $m[1] ) );
2779         }
2780
2781         static function getMessagesFileName( $code ) {
2782                 global $IP;
2783                 return self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
2784         }
2785
2786         static function getClassFileName( $code ) {
2787                 global $IP;
2788                 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
2789         }
2790
2791         /**
2792          * Get the fallback for a given language
2793          */
2794         static function getFallbackFor( $code ) {
2795                 if ( $code === 'en' ) {
2796                         // Shortcut
2797                         return false;
2798                 } else {
2799                         return self::getLocalisationCache()->getItem( $code, 'fallback' );
2800                 }
2801         }
2802
2803         /**
2804          * Get all messages for a given language
2805          * WARNING: this may take a long time
2806          */
2807         static function getMessagesFor( $code ) {
2808                 return self::getLocalisationCache()->getItem( $code, 'messages' );
2809         }
2810
2811         /**
2812          * Get a message for a given language
2813          */
2814         static function getMessageFor( $key, $code ) {
2815                 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
2816         }
2817
2818         function fixVariableInNamespace( $talk ) {
2819                 if ( strpos( $talk, '$1' ) === false ) {
2820                         return $talk;
2821                 }
2822
2823                 global $wgMetaNamespace;
2824                 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
2825
2826                 # Allow grammar transformations
2827                 # Allowing full message-style parsing would make simple requests
2828                 # such as action=raw much more expensive than they need to be.
2829                 # This will hopefully cover most cases.
2830                 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
2831                         array( &$this, 'replaceGrammarInNamespace' ), $talk );
2832                 return str_replace( ' ', '_', $talk );
2833         }
2834
2835         function replaceGrammarInNamespace( $m ) {
2836                 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
2837         }
2838
2839         static function getCaseMaps() {
2840                 static $wikiUpperChars, $wikiLowerChars;
2841                 if ( isset( $wikiUpperChars ) ) {
2842                         return array( $wikiUpperChars, $wikiLowerChars );
2843                 }
2844
2845                 wfProfileIn( __METHOD__ );
2846                 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
2847                 if ( $arr === false ) {
2848                         throw new MWException(
2849                                 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
2850                 }
2851                 extract( $arr );
2852                 wfProfileOut( __METHOD__ );
2853                 return array( $wikiUpperChars, $wikiLowerChars );
2854         }
2855
2856         function formatTimePeriod( $seconds ) {
2857                 if ( $seconds < 10 ) {
2858                         return $this->formatNum( sprintf( "%.1f", $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2859                 } elseif ( $seconds < 60 ) {
2860                         return $this->formatNum( round( $seconds ) ) . $this->getMessageFromDB( 'seconds-abbrev' );
2861                 } elseif ( $seconds < 3600 ) {
2862                         $minutes = floor( $seconds / 60 );
2863                         $secondsPart = round( fmod( $seconds, 60 ) );
2864                         if ( $secondsPart == 60 ) {
2865                                 $secondsPart = 0;
2866                                 $minutes++;
2867                         }
2868                         return $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2869                                 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2870                 } else {
2871                         $hours = floor( $seconds / 3600 );
2872                         $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
2873                         $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
2874                         if ( $secondsPart == 60 ) {
2875                                 $secondsPart = 0;
2876                                 $minutes++;
2877                         }
2878                         if ( $minutes == 60 ) {
2879                                 $minutes = 0;
2880                                 $hours++;
2881                         }
2882                         return $this->formatNum( $hours ) . $this->getMessageFromDB( 'hours-abbrev' ) . ' ' .
2883                                 $this->formatNum( $minutes ) . $this->getMessageFromDB( 'minutes-abbrev' ) . ' ' .
2884                                 $this->formatNum( $secondsPart ) . $this->getMessageFromDB( 'seconds-abbrev' );
2885                 }
2886         }
2887
2888         function formatBitrate( $bps ) {
2889                 $units = array( 'bps', 'kbps', 'Mbps', 'Gbps' );
2890                 if ( $bps <= 0 ) {
2891                         return $this->formatNum( $bps ) . $units[0];
2892                 }
2893                 $unitIndex = floor( log10( $bps ) / 3 );
2894                 $mantissa = $bps / pow( 1000, $unitIndex );
2895                 if ( $mantissa < 10 ) {
2896                         $mantissa = round( $mantissa, 1 );
2897                 } else {
2898                         $mantissa = round( $mantissa );
2899                 }
2900                 return $this->formatNum( $mantissa ) . $units[$unitIndex];
2901         }
2902
2903         /**
2904          * Format a size in bytes for output, using an appropriate
2905          * unit (B, KB, MB or GB) according to the magnitude in question
2906          *
2907          * @param $size Size to format
2908          * @return string Plain text (not HTML)
2909          */
2910         function formatSize( $size ) {
2911                 // For small sizes no decimal places necessary
2912                 $round = 0;
2913                 if ( $size > 1024 ) {
2914                         $size = $size / 1024;
2915                         if ( $size > 1024 ) {
2916                                 $size = $size / 1024;
2917                                 // For MB and bigger two decimal places are smarter
2918                                 $round = 2;
2919                                 if ( $size > 1024 ) {
2920                                         $size = $size / 1024;
2921                                         $msg = 'size-gigabytes';
2922                                 } else {
2923                                         $msg = 'size-megabytes';
2924                                 }
2925                         } else {
2926                                 $msg = 'size-kilobytes';
2927                         }
2928                 } else {
2929                         $msg = 'size-bytes';
2930                 }
2931                 $size = round( $size, $round );
2932                 $text = $this->getMessageFromDB( $msg );
2933                 return str_replace( '$1', $this->formatNum( $size ), $text );
2934         }
2935
2936         /**
2937          * Get the conversion rule title, if any.
2938          */
2939         function getConvRuleTitle() {
2940                 return $this->mConverter->getConvRuleTitle();
2941         }
2942
2943         /**
2944          * Given a string, convert it to a (hopefully short) key that can be used
2945          * for efficient sorting.  A binary sort according to the sortkeys
2946          * corresponds to a logical sort of the corresponding strings.  Current
2947          * code expects that a null character should sort before all others, but
2948          * has no other particular expectations (and that one can be changed if
2949          * necessary).
2950          *
2951          * @param string $string UTF-8 string
2952          * @return string Binary sortkey
2953          */
2954         public function convertToSortkey( $string ) {
2955                 # Fake function for now
2956                 return strtoupper( $string );
2957         }
2958
2959         /**
2960          * Does it make sense for lists to be split up into sections based on their
2961          * first letter?  Logogram-based scripts probably want to return false.
2962          *
2963          * TODO: Use this in CategoryPage.php.
2964          *
2965          * @return boolean
2966          */
2967         public function usesFirstLettersInLists() {
2968                 return true;
2969         }
2970
2971         /**
2972          * Given a string, return the logical "first letter" to be used for
2973          * grouping on category pages and so on.  This has to be coordinated
2974          * carefully with convertToSortkey(), or else the sorted list might jump
2975          * back and forth between the same "initial letters" or other pathological
2976          * behavior.  For instance, if you just return the first character, but "a"
2977          * sorts the same as "A" based on convertToSortkey(), then you might get a
2978          * list like
2979          *
2980          * == A ==
2981          * * [[Aardvark]]
2982          *
2983          * == a ==
2984          * * [[antelope]]
2985          *
2986          * == A ==
2987          * * [[Ape]]
2988          *
2989          * etc., assuming for the sake of argument that $wgCapitalLinks is false.
2990          * Obviously, this is ignored if usesFirstLettersInLists() is false.
2991          *
2992          * @param string $string UTF-8 string
2993          * @return string UTF-8 string corresponding to the first letter of input
2994          */
2995         public function firstLetterForLists( $string ) {
2996                 if ( $string[0] == "\0" ) {
2997                         $string = substr( $string, 1 );
2998                 }
2999                 return strtoupper( $this->firstChar( $string ) );
3000         }
3001 }