6 if( !defined( 'MEDIAWIKI' ) ) {
7 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
12 # In general you should not make customizations in these language files
13 # directly, but should use the MediaWiki: special namespace to customize
14 # user interface messages through the wiki.
15 # See http://meta.wikipedia.org/wiki/MediaWiki_namespace
17 # NOTE TO TRANSLATORS: Do not copy this whole file when making translations!
18 # A lot of common constants and a base class with inheritable methods are
19 # defined here, which should not be redefined. See the other LanguageXx.php
24 global $wgLanguageNames;
25 require_once( 'Names.php' );
27 global $wgInputEncoding, $wgOutputEncoding;
30 * These are always UTF-8, they exist only for backwards compatibility
32 $wgInputEncoding = "UTF-8";
33 $wgOutputEncoding = "UTF-8";
35 if( function_exists( 'mb_strtoupper' ) ) {
36 mb_internal_encoding('UTF-8');
39 /* a fake language converter */
42 function FakeConverter($langobj) {$this->mLang
= $langobj;}
43 function convert($t, $i) {return $t;}
44 function parserConvert($t, $p) {return $t;}
45 function getVariants() { return array( $this->mLang
->getCode() ); }
46 function getPreferredVariant() {return $this->mLang
->getCode(); }
47 function findVariantLink(&$l, &$n) {}
48 function getExtraHashOptions() {return '';}
49 function getParsedTitle() {return '';}
50 function markNoConversion($text, $noParse=false) {return $text;}
51 function convertCategoryKey( $key ) {return $key; }
52 function convertLinkToAllVariants($text){ return array( $this->mLang
->getCode() => $text); }
53 function armourMath($text){ return $text; }
56 #--------------------------------------------------------------------------
57 # Internationalisation code
58 #--------------------------------------------------------------------------
61 var $mConverter, $mVariants, $mCode, $mLoaded = false;
63 static public $mLocalisationKeys = array( 'fallback', 'namespaceNames',
64 'quickbarSettings', 'skinNames', 'mathNames',
65 'bookstoreList', 'magicWords', 'messages', 'rtl', 'digitTransformTable',
66 'separatorTransformTable', 'fallback8bitEncoding', 'linkPrefixExtension',
67 'defaultUserOptionOverrides', 'linkTrail', 'namespaceAliases',
68 'dateFormats', 'datePreferences', 'datePreferenceMigrationMap',
69 'defaultDateFormat', 'extraUserToggles', 'specialPageAliases' );
71 static public $mMergeableMapKeys = array( 'messages', 'namespaceNames', 'mathNames',
72 'dateFormats', 'defaultUserOptionOverrides', 'magicWords' );
74 static public $mMergeableListKeys = array( 'extraUserToggles' );
76 static public $mMergeableAliasListKeys = array( 'specialPageAliases' );
78 static public $mLocalisationCache = array();
80 static public $mWeekdayMsgs = array(
81 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
85 static public $mWeekdayAbbrevMsgs = array(
86 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
89 static public $mMonthMsgs = array(
90 'january', 'february', 'march', 'april', 'may_long', 'june',
91 'july', 'august', 'september', 'october', 'november',
94 static public $mMonthGenMsgs = array(
95 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
96 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
99 static public $mMonthAbbrevMsgs = array(
100 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
101 'sep', 'oct', 'nov', 'dec'
105 * Create a language object for a given language code
107 static function factory( $code ) {
109 static $recursionLevel = 0;
111 if ( $code == 'en' ) {
114 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
115 // Preload base classes to work around APC/PHP5 bug
116 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
117 include_once("$IP/languages/classes/$class.deps.php");
119 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
120 include_once("$IP/languages/classes/$class.php");
124 if ( $recursionLevel > 5 ) {
125 throw new MWException( "Language fallback loop detected when creating class $class\n" );
128 if( ! class_exists( $class ) ) {
129 $fallback = Language
::getFallbackFor( $code );
131 $lang = Language
::factory( $fallback );
133 $lang->setCode( $code );
141 function __construct() {
142 $this->mConverter
= new FakeConverter($this);
143 // Set the code to the name of the descendant
144 if ( get_class( $this ) == 'Language' ) {
147 $this->mCode
= str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
152 * Hook which will be called if this is the content language.
153 * Descendants can use this to register hook functions or modify globals
155 function initContLang() {}
161 function getDefaultUserOptions() {
162 return User
::getDefaultOptions();
166 * Exports $wgBookstoreListEn
169 function getBookstoreList() {
171 return $this->bookstoreList
;
177 function getNamespaces() {
179 return $this->namespaceNames
;
183 * A convenience function that returns the same thing as
184 * getNamespaces() except with the array values changed to ' '
185 * where it found '_', useful for producing output to be displayed
186 * e.g. in <select> forms.
190 function getFormattedNamespaces() {
191 $ns = $this->getNamespaces();
192 foreach($ns as $k => $v) {
193 $ns[$k] = strtr($v, '_', ' ');
199 * Get a namespace value by key
201 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
202 * echo $mw_ns; // prints 'MediaWiki'
205 * @param int $index the array key of the namespace to return
206 * @return mixed, string if the namespace value exists, otherwise false
208 function getNsText( $index ) {
209 $ns = $this->getNamespaces();
210 return isset( $ns[$index] ) ?
$ns[$index] : false;
214 * A convenience function that returns the same thing as
215 * getNsText() except with '_' changed to ' ', useful for
220 function getFormattedNsText( $index ) {
221 $ns = $this->getNsText( $index );
222 return strtr($ns, '_', ' ');
226 * Get a namespace key by value, case insensetive.
228 * @param string $text
229 * @return mixed An integer if $text is a valid value otherwise false
231 function getNsIndex( $text ) {
233 $lctext = $this->lc($text);
234 return isset( $this->mNamespaceIds
[$lctext] ) ?
$this->mNamespaceIds
[$lctext] : false;
238 * short names for language variants used for language conversion links.
240 * @param string $code
243 function getVariantname( $code ) {
244 return $this->getMessageFromDB( "variantname-$code" );
247 function specialPage( $name ) {
248 $aliases = $this->getSpecialPageAliases();
249 if ( isset( $aliases[$name][0] ) ) {
250 $name = $aliases[$name][0];
252 return $this->getNsText(NS_SPECIAL
) . ':' . $name;
255 function getQuickbarSettings() {
257 return $this->quickbarSettings
;
260 function getSkinNames() {
262 return $this->skinNames
;
265 function getMathNames() {
267 return $this->mathNames
;
270 function getDatePreferences() {
272 return $this->datePreferences
;
275 function getDateFormats() {
277 return $this->dateFormats
;
280 function getDefaultDateFormat() {
282 return $this->defaultDateFormat
;
285 function getDatePreferenceMigrationMap() {
287 return $this->datePreferenceMigrationMap
;
290 function getDefaultUserOptionOverrides() {
292 return $this->defaultUserOptionOverrides
;
295 function getExtraUserToggles() {
297 return $this->extraUserToggles
;
300 function getUserToggle( $tog ) {
301 return $this->getMessageFromDB( "tog-$tog" );
305 * Get language names, indexed by code.
306 * If $customisedOnly is true, only returns codes with a messages file
308 public static function getLanguageNames( $customisedOnly = false ) {
309 global $wgLanguageNames;
310 if ( !$customisedOnly ) {
311 return $wgLanguageNames;
315 $messageFiles = glob( "$IP/languages/messages/Messages*.php" );
317 foreach ( $messageFiles as $file ) {
319 if( preg_match( '/Messages([A-Z][a-z_]+)\.php$/', $file, $m ) ) {
320 $code = str_replace( '_', '-', strtolower( $m[1] ) );
321 if ( isset( $wgLanguageNames[$code] ) ) {
322 $names[$code] = $wgLanguageNames[$code];
330 * Ugly hack to get a message maybe from the MediaWiki namespace, if this
331 * language object is the content or user language.
333 function getMessageFromDB( $msg ) {
334 global $wgContLang, $wgLang;
335 if ( $wgContLang->getCode() == $this->getCode() ) {
337 return wfMsgForContent( $msg );
338 } elseif ( $wgLang->getCode() == $this->getCode() ) {
340 return wfMsg( $msg );
342 # Neither, get from localisation
343 return $this->getMessage( $msg );
347 function getLanguageName( $code ) {
348 global $wgLanguageNames;
349 if ( ! array_key_exists( $code, $wgLanguageNames ) ) {
352 return $wgLanguageNames[$code];
355 function getMonthName( $key ) {
356 return $this->getMessageFromDB( self
::$mMonthMsgs[$key-1] );
359 function getMonthNameGen( $key ) {
360 return $this->getMessageFromDB( self
::$mMonthGenMsgs[$key-1] );
363 function getMonthAbbreviation( $key ) {
364 return $this->getMessageFromDB( self
::$mMonthAbbrevMsgs[$key-1] );
367 function getWeekdayName( $key ) {
368 return $this->getMessageFromDB( self
::$mWeekdayMsgs[$key-1] );
371 function getWeekdayAbbreviation( $key ) {
372 return $this->getMessageFromDB( self
::$mWeekdayAbbrevMsgs[$key-1] );
376 * Used by date() and time() to adjust the time output.
378 * @param int $ts the time in date('YmdHis') format
379 * @param mixed $tz adjust the time by this amount (default false,
380 * mean we get user timecorrection setting)
383 function userAdjust( $ts, $tz = false ) {
384 global $wgUser, $wgLocalTZoffset;
387 $tz = $wgUser->getOption( 'timecorrection' );
390 # minutes and hours differences:
395 # Global offset in minutes.
396 if( isset($wgLocalTZoffset) ) {
397 $hrDiff = $wgLocalTZoffset %
60;
398 $minDiff = $wgLocalTZoffset - ($hrDiff * 60);
400 } elseif ( strpos( $tz, ':' ) !== false ) {
401 $tzArray = explode( ':', $tz );
402 $hrDiff = intval($tzArray[0]);
403 $minDiff = intval($hrDiff < 0 ?
-$tzArray[1] : $tzArray[1]);
405 $hrDiff = intval( $tz );
408 # No difference ? Return time unchanged
409 if ( 0 == $hrDiff && 0 == $minDiff ) { return $ts; }
411 # Generate an adjusted date
413 (int)substr( $ts, 8, 2) ) +
$hrDiff, # Hours
414 (int)substr( $ts, 10, 2 ) +
$minDiff, # Minutes
415 (int)substr( $ts, 12, 2 ), # Seconds
416 (int)substr( $ts, 4, 2 ), # Month
417 (int)substr( $ts, 6, 2 ), # Day
418 (int)substr( $ts, 0, 4 ) ); #Year
419 return date( 'YmdHis', $t );
423 * This is a workalike of PHP's date() function, but with better
424 * internationalisation, a reduced set of format characters, and a better
427 * Supported format characters are dDjlNwzWFmMntLYyaAgGhHiscrU. See the
428 * PHP manual for definitions. There are a number of extensions, which
431 * xn Do not translate digits of the next numeric format character
432 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
433 * xr Use roman numerals for the next numeric format character
435 * xg Genitive month name
437 * Characters enclosed in double quotes will be considered literal (with
438 * the quotes themselves removed). Unmatched quotes will be considered
439 * literal quotes. Example:
441 * "The month is" F => The month is January
444 * Backslash escaping is also supported.
446 * @param string $format
447 * @param string $ts 14-character timestamp
451 function sprintfDate( $format, $ts ) {
457 for ( $p = 0; $p < strlen( $format ); $p++
) {
460 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
461 $code .= $format[++
$p];
472 $rawToggle = !$rawToggle;
478 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
481 $num = substr( $ts, 6, 2 );
484 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
485 $s .= $this->getWeekdayAbbreviation( date( 'w', $unix ) +
1 );
488 $num = intval( substr( $ts, 6, 2 ) );
491 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
492 $s .= $this->getWeekdayName( date( 'w', $unix ) +
1 );
495 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
496 $w = date( 'w', $unix );
500 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
501 $num = date( 'w', $unix );
504 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
505 $num = date( 'z', $unix );
508 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
509 $num = date( 'W', $unix );
512 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
515 $num = substr( $ts, 4, 2 );
518 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
521 $num = intval( substr( $ts, 4, 2 ) );
524 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
525 $num = date( 't', $unix );
528 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
529 $num = date( 'L', $unix );
532 $num = substr( $ts, 0, 4 );
535 $num = substr( $ts, 2, 2 );
538 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'am' : 'pm';
541 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'AM' : 'PM';
544 $h = substr( $ts, 8, 2 );
545 $num = $h %
12 ?
$h %
12 : 12;
548 $num = intval( substr( $ts, 8, 2 ) );
551 $h = substr( $ts, 8, 2 );
552 $num = sprintf( '%02d', $h %
12 ?
$h %
12 : 12 );
555 $num = substr( $ts, 8, 2 );
558 $num = substr( $ts, 10, 2 );
561 $num = substr( $ts, 12, 2 );
564 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
565 $s .= date( 'c', $unix );
568 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
569 $s .= date( 'r', $unix );
572 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
577 if ( $p < strlen( $format ) - 1 ) {
585 if ( $p < strlen( $format ) - 1 ) {
586 $endQuote = strpos( $format, '"', $p +
1 );
587 if ( $endQuote === false ) {
588 # No terminating quote, assume literal "
591 $s .= substr( $format, $p +
1, $endQuote - $p - 1 );
595 # Quote at end of string, assume literal "
602 if ( $num !== false ) {
603 if ( $rawToggle ||
$raw ) {
606 } elseif ( $roman ) {
607 $s .= self
::romanNumeral( $num );
610 $s .= $this->formatNum( $num, true );
619 * Roman number formatting up to 3000
621 static function romanNumeral( $num ) {
622 static $table = array(
623 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
624 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
625 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
626 array( '', 'M', 'MM', 'MMM' )
629 $num = intval( $num );
630 if ( $num > 3000 ||
$num <= 0 ) {
635 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
636 if ( $num >= $pow10 ) {
637 $s .= $table[$i][floor($num / $pow10)];
639 $num = $num %
$pow10;
645 * This is meant to be used by time(), date(), and timeanddate() to get
646 * the date preference they're supposed to use, it should be used in
650 * function timeanddate([...], $format = true) {
651 * $datePreference = $this->dateFormat($format);
656 * @param mixed $usePrefs: if true, the user's preference is used
657 * if false, the site/language default is used
658 * if int/string, assumed to be a format.
661 function dateFormat( $usePrefs = true ) {
664 if( is_bool( $usePrefs ) ) {
666 $datePreference = $wgUser->getDatePreference();
668 $options = User
::getDefaultOptions();
669 $datePreference = (string)$options['date'];
672 $datePreference = (string)$usePrefs;
676 if( $datePreference == '' ) {
680 return $datePreference;
685 * @param mixed $ts the time format which needs to be turned into a
686 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
687 * @param bool $adj whether to adjust the time output according to the
688 * user configured offset ($timecorrection)
689 * @param mixed $format true to use user's date format preference
690 * @param string $timecorrection the time offset as returned by
691 * validateTimeZone() in Special:Preferences
694 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
697 $ts = $this->userAdjust( $ts, $timecorrection );
700 $pref = $this->dateFormat( $format );
701 if( $pref == 'default' ||
!isset( $this->dateFormats
["$pref date"] ) ) {
702 $pref = $this->defaultDateFormat
;
704 return $this->sprintfDate( $this->dateFormats
["$pref date"], $ts );
709 * @param mixed $ts the time format which needs to be turned into a
710 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
711 * @param bool $adj whether to adjust the time output according to the
712 * user configured offset ($timecorrection)
713 * @param mixed $format true to use user's date format preference
714 * @param string $timecorrection the time offset as returned by
715 * validateTimeZone() in Special:Preferences
718 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
721 $ts = $this->userAdjust( $ts, $timecorrection );
724 $pref = $this->dateFormat( $format );
725 if( $pref == 'default' ||
!isset( $this->dateFormats
["$pref time"] ) ) {
726 $pref = $this->defaultDateFormat
;
728 return $this->sprintfDate( $this->dateFormats
["$pref time"], $ts );
733 * @param mixed $ts the time format which needs to be turned into a
734 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
735 * @param bool $adj whether to adjust the time output according to the
736 * user configured offset ($timecorrection)
738 * @param mixed $format what format to return, if it's false output the
739 * default one (default true)
740 * @param string $timecorrection the time offset as returned by
741 * validateTimeZone() in Special:Preferences
744 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false) {
747 $ts = $this->userAdjust( $ts, $timecorrection );
750 $pref = $this->dateFormat( $format );
751 if( $pref == 'default' ||
!isset( $this->dateFormats
["$pref both"] ) ) {
752 $pref = $this->defaultDateFormat
;
755 return $this->sprintfDate( $this->dateFormats
["$pref both"], $ts );
758 function getMessage( $key ) {
760 return isset( $this->messages
[$key] ) ?
$this->messages
[$key] : null;
763 function getAllMessages() {
765 return $this->messages
;
768 function iconv( $in, $out, $string ) {
769 # For most languages, this is a wrapper for iconv
770 return iconv( $in, $out, $string );
773 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
774 function ucwordbreaksCallbackAscii($matches){
775 return $this->ucfirst($matches[1]);
778 function ucwordbreaksCallbackMB($matches){
779 return mb_strtoupper($matches[0]);
782 function ucCallback($matches){
783 list( $wikiUpperChars ) = self
::getCaseMaps();
784 return strtr( $matches[1], $wikiUpperChars );
787 function lcCallback($matches){
788 list( , $wikiLowerChars ) = self
::getCaseMaps();
789 return strtr( $matches[1], $wikiLowerChars );
792 function ucwordsCallbackMB($matches){
793 return mb_strtoupper($matches[0]);
796 function ucwordsCallbackWiki($matches){
797 list( $wikiUpperChars ) = self
::getCaseMaps();
798 return strtr( $matches[0], $wikiUpperChars );
801 function ucfirst( $str ) {
802 return self
::uc( $str, true );
805 function uc( $str, $first = false ) {
806 if ( function_exists( 'mb_strtoupper' ) ) {
808 if ( self
::isMultibyte( $str ) ) {
809 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
811 return ucfirst( $str );
814 return self
::isMultibyte( $str ) ?
mb_strtoupper( $str ) : strtoupper( $str );
817 if ( self
::isMultibyte( $str ) ) {
818 list( $wikiUpperChars ) = $this->getCaseMaps();
819 $x = $first ?
'^' : '';
820 return preg_replace_callback(
821 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
822 array($this,"ucCallback"),
826 return $first ?
ucfirst( $str ) : strtoupper( $str );
831 function lcfirst( $str ) {
832 return self
::lc( $str, true );
835 function lc( $str, $first = false ) {
836 if ( function_exists( 'mb_strtolower' ) )
838 if ( self
::isMultibyte( $str ) )
839 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
841 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
843 return self
::isMultibyte( $str ) ?
mb_strtolower( $str ) : strtolower( $str );
845 if ( self
::isMultibyte( $str ) ) {
846 list( , $wikiLowerChars ) = self
::getCaseMaps();
847 $x = $first ?
'^' : '';
848 return preg_replace_callback(
849 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
850 array($this,"lcCallback"),
854 return $first ?
strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
857 function isMultibyte( $str ) {
858 return (bool)preg_match( '/[\x80-\xff]/', $str );
861 function ucwords($str) {
862 if ( self
::isMultibyte( $str ) ) {
863 $str = self
::lc($str);
865 // regexp to find first letter in each word (i.e. after each space)
866 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
868 // function to use to capitalize a single char
869 if ( function_exists( 'mb_strtoupper' ) )
870 return preg_replace_callback(
872 array($this,"ucwordsCallbackMB"),
876 return preg_replace_callback(
878 array($this,"ucwordsCallbackWiki"),
883 return ucwords( strtolower( $str ) );
886 # capitalize words at word breaks
887 function ucwordbreaks($str){
888 if (self
::isMultibyte( $str ) ) {
889 $str = self
::lc($str);
891 // since \b doesn't work for UTF-8, we explicitely define word break chars
892 $breaks= "[ \-\(\)\}\{\.,\?!]";
894 // find first letter after word break
895 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
897 if ( function_exists( 'mb_strtoupper' ) )
898 return preg_replace_callback(
900 array($this,"ucwordbreaksCallbackMB"),
904 return preg_replace_callback(
906 array($this,"ucwordsCallbackWiki"),
911 return preg_replace_callback(
912 '/\b([\w\x80-\xff]+)\b/',
913 array($this,"ucwordbreaksCallbackAscii"),
918 * Return a case-folded representation of $s
920 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
921 * and $s2 are the same except for the case of their characters. It is not
922 * necessary for the value returned to make sense when displayed.
924 * Do *not* perform any other normalisation in this function. If a caller
925 * uses this function when it should be using a more general normalisation
926 * function, then fix the caller.
928 function caseFold( $s ) {
929 return $this->uc( $s );
932 function checkTitleEncoding( $s ) {
933 if( is_array( $s ) ) {
934 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
936 # Check for non-UTF-8 URLs
937 $ishigh = preg_match( '/[\x80-\xff]/', $s);
938 if(!$ishigh) return $s;
940 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
941 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
942 if( $isutf8 ) return $s;
944 return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
947 function fallback8bitEncoding() {
949 return $this->fallback8bitEncoding
;
953 * Some languages have special punctuation to strip out
954 * or characters which need to be converted for MySQL's
955 * indexing to grok it correctly. Make such changes here.
960 function stripForSearch( $string ) {
962 if ( $wgDBtype != 'mysql' ) {
966 # MySQL fulltext index doesn't grok utf-8, so we
967 # need to fold cases and convert to hex
969 wfProfileIn( __METHOD__
);
970 if( function_exists( 'mb_strtolower' ) ) {
972 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
973 "'U8' . bin2hex( \"$1\" )",
974 mb_strtolower( $string ) );
976 list( , $wikiLowerChars ) = self
::getCaseMaps();
978 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
979 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
982 wfProfileOut( __METHOD__
);
986 function convertForSearchResult( $termsArray ) {
987 # some languages, e.g. Chinese, need to do a conversion
988 # in order for search results to be displayed correctly
993 * Get the first character of a string.
998 function firstChar( $s ) {
1000 preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1001 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
1003 return isset( $matches[1] ) ?
$matches[1] : "";
1006 function initEncoding() {
1007 # Some languages may have an alternate char encoding option
1008 # (Esperanto X-coding, Japanese furigana conversion, etc)
1009 # If this language is used as the primary content language,
1010 # an override to the defaults can be set here on startup.
1013 function recodeForEdit( $s ) {
1014 # For some languages we'll want to explicitly specify
1015 # which characters make it into the edit box raw
1016 # or are converted in some way or another.
1017 # Note that if wgOutputEncoding is different from
1018 # wgInputEncoding, this text will be further converted
1019 # to wgOutputEncoding.
1020 global $wgEditEncoding;
1021 if( $wgEditEncoding == '' or
1022 $wgEditEncoding == 'UTF-8' ) {
1025 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1029 function recodeInput( $s ) {
1030 # Take the previous into account.
1031 global $wgEditEncoding;
1032 if($wgEditEncoding != "") {
1033 $enc = $wgEditEncoding;
1037 if( $enc == 'UTF-8' ) {
1040 return $this->iconv( $enc, 'UTF-8', $s );
1045 * For right-to-left language support
1055 * A hidden direction mark (LRM or RLM), depending on the language direction
1059 function getDirMark() {
1060 return $this->isRTL() ?
"\xE2\x80\x8F" : "\xE2\x80\x8E";
1064 * An arrow, depending on the language direction
1068 function getArrow() {
1069 return $this->isRTL() ?
'←' : '→';
1073 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
1077 function linkPrefixExtension() {
1079 return $this->linkPrefixExtension
;
1082 function &getMagicWords() {
1084 return $this->magicWords
;
1087 # Fill a MagicWord object with data from here
1088 function getMagic( &$mw ) {
1089 if ( !isset( $this->mMagicExtensions
) ) {
1090 $this->mMagicExtensions
= array();
1091 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions
, $this->getCode() ) );
1093 if ( isset( $this->mMagicExtensions
[$mw->mId
] ) ) {
1094 $rawEntry = $this->mMagicExtensions
[$mw->mId
];
1096 $magicWords =& $this->getMagicWords();
1097 if ( isset( $magicWords[$mw->mId
] ) ) {
1098 $rawEntry = $magicWords[$mw->mId
];
1100 # Fall back to English if local list is incomplete
1101 $magicWords =& Language
::getMagicWords();
1102 $rawEntry = $magicWords[$mw->mId
];
1106 if( !is_array( $rawEntry ) ) {
1107 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
1109 $mw->mCaseSensitive
= $rawEntry[0];
1110 $mw->mSynonyms
= array_slice( $rawEntry, 1 );
1114 * Get special page names, as an associative array
1115 * case folded alias => real name
1117 function getSpecialPageAliases() {
1119 if ( !isset( $this->mExtendedSpecialPageAliases
) ) {
1120 $this->mExtendedSpecialPageAliases
= $this->specialPageAliases
;
1121 wfRunHooks( 'LangugeGetSpecialPageAliases',
1122 array( &$this->mExtendedSpecialPageAliases
, $this->getCode() ) );
1124 return $this->mExtendedSpecialPageAliases
;
1128 * Italic is unsuitable for some languages
1132 * @param string $text The text to be emphasized.
1135 function emphasize( $text ) {
1136 return "<em>$text</em>";
1140 * Normally we output all numbers in plain en_US style, that is
1141 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
1142 * point twohundredthirtyfive. However this is not sutable for all
1143 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
1144 * Icelandic just want to use commas instead of dots, and dots instead
1145 * of commas like "293.291,235".
1147 * An example of this function being called:
1149 * wfMsg( 'message', $wgLang->formatNum( $num ) )
1152 * See LanguageGu.php for the Gujarati implementation and
1153 * LanguageIs.php for the , => . and . => , implementation.
1155 * @todo check if it's viable to use localeconv() for the decimal
1158 * @param mixed $number the string to be formatted, should be an integer or
1159 * a floating point number.
1160 * @param bool $nocommafy Set to true for special numbers like dates
1163 function formatNum( $number, $nocommafy = false ) {
1164 global $wgTranslateNumerals;
1166 $number = $this->commafy($number);
1167 $s = $this->separatorTransformTable();
1168 if (!is_null($s)) { $number = strtr($number, $s); }
1171 if ($wgTranslateNumerals) {
1172 $s = $this->digitTransformTable();
1173 if (!is_null($s)) { $number = strtr($number, $s); }
1179 function parseFormattedNumber( $number ) {
1180 $s = $this->digitTransformTable();
1181 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1183 $s = $this->separatorTransformTable();
1184 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1186 $number = strtr( $number, array (',' => '') );
1191 * Adds commas to a given number
1196 function commafy($_) {
1197 return strrev((string)preg_replace('/(\d{3})(?=\d)(?!\d*\.)/','$1,',strrev($_)));
1200 function digitTransformTable() {
1202 return $this->digitTransformTable
;
1205 function separatorTransformTable() {
1207 return $this->separatorTransformTable
;
1212 * For the credit list in includes/Credits.php (action=credits)
1217 function listToText( $l ) {
1220 for ($i = $m; $i >= 0; $i--) {
1223 } else if ($i == $m - 1) {
1224 $s = $l[$i] . ' ' . $this->getMessageFromDB( 'and' ) . ' ' . $s;
1226 $s = $l[$i] . ', ' . $s;
1232 # Crop a string from the beginning or end to a certain number of bytes.
1233 # (Bytes are used because our storage has limited byte lengths for some
1234 # columns in the database.) Multibyte charsets will need to make sure that
1235 # only whole characters are included!
1237 # $length does not include the optional ellipsis.
1238 # If $length is negative, snip from the beginning
1239 function truncate( $string, $length, $ellipsis = "" ) {
1240 if( $length == 0 ) {
1243 if ( strlen( $string ) <= abs( $length ) ) {
1247 $string = substr( $string, 0, $length );
1248 $char = ord( $string[strlen( $string ) - 1] );
1250 if ($char >= 0xc0) {
1251 # We got the first byte only of a multibyte char; remove it.
1252 $string = substr( $string, 0, -1 );
1253 } elseif( $char >= 0x80 &&
1254 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
1255 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
1256 # We chopped in the middle of a character; remove it
1259 return $string . $ellipsis;
1261 $string = substr( $string, $length );
1262 $char = ord( $string[0] );
1263 if( $char >= 0x80 && $char < 0xc0 ) {
1264 # We chopped in the middle of a character; remove the whole thing
1265 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
1267 return $ellipsis . $string;
1272 * Grammatical transformations, needed for inflected languages
1273 * Invoked by putting {{grammar:case|word}} in a message
1275 * @param string $word
1276 * @param string $case
1279 function convertGrammar( $word, $case ) {
1280 global $wgGrammarForms;
1281 if ( isset($wgGrammarForms['en'][$case][$word]) ) {
1282 return $wgGrammarForms['en'][$case][$word];
1288 * Plural form transformations, needed for some languages.
1289 * For example, where are 3 form of plural in Russian and Polish,
1290 * depending on "count mod 10". See [[w:Plural]]
1291 * For English it is pretty simple.
1293 * Invoked by putting {{plural:count|wordform1|wordform2}}
1294 * or {{plural:count|wordform1|wordform2|wordform3}}
1296 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
1298 * @param integer $count
1299 * @param string $wordform1
1300 * @param string $wordform2
1301 * @param string $wordform3 (optional)
1302 * @param string $wordform4 (optional)
1303 * @param string $wordform5 (optional)
1306 function convertPlural( $count, $w1, $w2, $w3, $w4, $w5) {
1307 return ( $count == '1' ||
$count == '-1' ) ?
$w1 : $w2;
1311 * For translaing of expiry times
1312 * @param string The validated block time in English
1313 * @return Somehow translated block time
1314 * @see LanguageFi.php for example implementation
1316 function translateBlockExpiry( $str ) {
1318 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
1320 if ( $scBlockExpiryOptions == '-') {
1324 foreach (explode(',', $scBlockExpiryOptions) as $option) {
1325 if ( strpos($option, ":") === false )
1327 list($show, $value) = explode(":", $option);
1328 if ( strcmp ( $str, $value) == 0 )
1329 return '<span title="' . htmlspecialchars($str). '">' .
1330 htmlspecialchars( trim( $show ) ) . '</span>';
1337 * languages like Chinese need to be segmented in order for the diff
1340 * @param string $text
1343 function segmentForDiff( $text ) {
1348 * and unsegment to show the result
1350 * @param string $text
1353 function unsegmentForDiff( $text ) {
1357 # convert text to different variants of a language.
1358 function convert( $text, $isTitle = false) {
1359 return $this->mConverter
->convert($text, $isTitle);
1362 # Convert text from within Parser
1363 function parserConvert( $text, &$parser ) {
1364 return $this->mConverter
->parserConvert( $text, $parser );
1367 # Check if this is a language with variants
1368 function hasVariants(){
1369 return sizeof($this->getVariants())>1;
1372 # Put custom tags (e.g. -{ }-) around math to prevent conversion
1373 function armourMath($text){
1374 return $this->mConverter
->armourMath($text);
1379 * Perform output conversion on a string, and encode for safe HTML output.
1380 * @param string $text
1381 * @param bool $isTitle -- wtf?
1383 * @todo this should get integrated somewhere sane
1385 function convertHtml( $text, $isTitle = false ) {
1386 return htmlspecialchars( $this->convert( $text, $isTitle ) );
1389 function convertCategoryKey( $key ) {
1390 return $this->mConverter
->convertCategoryKey( $key );
1394 * get the list of variants supported by this langauge
1395 * see sample implementation in LanguageZh.php
1397 * @return array an array of language codes
1399 function getVariants() {
1400 return $this->mConverter
->getVariants();
1404 function getPreferredVariant( $fromUser = true ) {
1405 return $this->mConverter
->getPreferredVariant( $fromUser );
1409 * if a language supports multiple variants, it is
1410 * possible that non-existing link in one variant
1411 * actually exists in another variant. this function
1412 * tries to find it. See e.g. LanguageZh.php
1414 * @param string $link the name of the link
1415 * @param mixed $nt the title object of the link
1416 * @return null the input parameters may be modified upon return
1418 function findVariantLink( &$link, &$nt ) {
1419 $this->mConverter
->findVariantLink($link, $nt);
1423 * If a language supports multiple variants, converts text
1424 * into an array of all possible variants of the text:
1425 * 'variant' => text in that variant
1428 function convertLinkToAllVariants($text){
1429 return $this->mConverter
->convertLinkToAllVariants($text);
1434 * returns language specific options used by User::getPageRenderHash()
1435 * for example, the preferred language variant
1440 function getExtraHashOptions() {
1441 return $this->mConverter
->getExtraHashOptions();
1445 * for languages that support multiple variants, the title of an
1446 * article may be displayed differently in different variants. this
1447 * function returns the apporiate title defined in the body of the article.
1451 function getParsedTitle() {
1452 return $this->mConverter
->getParsedTitle();
1456 * Enclose a string with the "no conversion" tag. This is used by
1457 * various functions in the Parser
1459 * @param string $text text to be tagged for no conversion
1460 * @return string the tagged text
1462 function markNoConversion( $text, $noParse=false ) {
1463 return $this->mConverter
->markNoConversion( $text, $noParse );
1467 * A regular expression to match legal word-trailing characters
1468 * which should be merged onto a link of the form [[foo]]bar.
1473 function linkTrail() {
1475 return $this->linkTrail
;
1478 function getLangObj() {
1483 * Get the RFC 3066 code for this language object
1485 function getCode() {
1486 return $this->mCode
;
1489 function setCode( $code ) {
1490 $this->mCode
= $code;
1493 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
1494 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
1497 static function getMessagesFileName( $code ) {
1499 return self
::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
1502 static function getClassFileName( $code ) {
1504 return self
::getFileName( "$IP/languages/classes/Language", $code, '.php' );
1507 static function getLocalisationArray( $code, $disableCache = false ) {
1508 self
::loadLocalisation( $code, $disableCache );
1509 return self
::$mLocalisationCache[$code];
1513 * Load localisation data for a given code into the static cache
1515 * @return array Dependencies, map of filenames to mtimes
1517 static function loadLocalisation( $code, $disableCache = false ) {
1518 static $recursionGuard = array();
1522 throw new MWException( "Invalid language code requested" );
1525 if ( !$disableCache ) {
1526 # Try the per-process cache
1527 if ( isset( self
::$mLocalisationCache[$code] ) ) {
1528 return self
::$mLocalisationCache[$code]['deps'];
1531 wfProfileIn( __METHOD__
);
1533 # Try the serialized directory
1534 $cache = wfGetPrecompiledData( self
::getFileName( "Messages", $code, '.ser' ) );
1536 self
::$mLocalisationCache[$code] = $cache;
1537 wfDebug( "Language::loadLocalisation(): got localisation for $code from precompiled data file\n" );
1538 wfProfileOut( __METHOD__
);
1539 return self
::$mLocalisationCache[$code]['deps'];
1542 # Try the global cache
1543 $memcKey = wfMemcKey('localisation', $code );
1544 $cache = $wgMemc->get( $memcKey );
1546 # Check file modification times
1547 foreach ( $cache['deps'] as $file => $mtime ) {
1548 if ( !file_exists( $file ) ||
filemtime( $file ) > $mtime ) {
1552 if ( self
::isLocalisationOutOfDate( $cache ) ) {
1553 $wgMemc->delete( $memcKey );
1555 wfDebug( "Language::loadLocalisation(): localisation cache for $code had expired due to update of $file\n" );
1557 self
::$mLocalisationCache[$code] = $cache;
1558 wfDebug( "Language::loadLocalisation(): got localisation for $code from cache\n" );
1559 wfProfileOut( __METHOD__
);
1560 return $cache['deps'];
1564 wfProfileIn( __METHOD__
);
1567 # Default fallback, may be overridden when the messages file is included
1568 if ( $code != 'en' ) {
1574 # Load the primary localisation from the source file
1575 $filename = self
::getMessagesFileName( $code );
1576 if ( !file_exists( $filename ) ) {
1577 wfDebug( "Language::loadLocalisation(): no localisation file for $code, using implicit fallback to en\n" );
1581 $deps = array( $filename => filemtime( $filename ) );
1582 require( $filename );
1583 $cache = compact( self
::$mLocalisationKeys );
1584 wfDebug( "Language::loadLocalisation(): got localisation for $code from source\n" );
1587 if ( !empty( $fallback ) ) {
1588 # Load the fallback localisation, with a circular reference guard
1589 if ( isset( $recursionGuard[$code] ) ) {
1590 throw new MWException( "Error: Circular fallback reference in language code $code" );
1592 $recursionGuard[$code] = true;
1593 $newDeps = self
::loadLocalisation( $fallback, $disableCache );
1594 unset( $recursionGuard[$code] );
1596 $secondary = self
::$mLocalisationCache[$fallback];
1597 $deps = array_merge( $deps, $newDeps );
1599 # Merge the fallback localisation with the current localisation
1600 foreach ( self
::$mLocalisationKeys as $key ) {
1601 if ( isset( $cache[$key] ) ) {
1602 if ( isset( $secondary[$key] ) ) {
1603 if ( in_array( $key, self
::$mMergeableMapKeys ) ) {
1604 $cache[$key] = $cache[$key] +
$secondary[$key];
1605 } elseif ( in_array( $key, self
::$mMergeableListKeys ) ) {
1606 $cache[$key] = array_merge( $secondary[$key], $cache[$key] );
1607 } elseif ( in_array( $key, self
::$mMergeableAliasListKeys ) ) {
1608 $cache[$key] = array_merge_recursive( $cache[$key], $secondary[$key] );
1612 $cache[$key] = $secondary[$key];
1616 # Merge bookstore lists if requested
1617 if ( !empty( $cache['bookstoreList']['inherit'] ) ) {
1618 $cache['bookstoreList'] = array_merge( $cache['bookstoreList'], $secondary['bookstoreList'] );
1620 if ( isset( $cache['bookstoreList']['inherit'] ) ) {
1621 unset( $cache['bookstoreList']['inherit'] );
1625 # Add dependencies to the cache entry
1626 $cache['deps'] = $deps;
1628 # Replace spaces with underscores in namespace names
1629 $cache['namespaceNames'] = str_replace( ' ', '_', $cache['namespaceNames'] );
1631 # Save to both caches
1632 self
::$mLocalisationCache[$code] = $cache;
1633 if ( !$disableCache ) {
1634 $wgMemc->set( $memcKey, $cache );
1637 wfProfileOut( __METHOD__
);
1642 * Test if a given localisation cache is out of date with respect to the
1643 * source Messages files. This is done automatically for the global cache
1644 * in $wgMemc, but is only done on certain occasions for the serialized
1647 * @param $cache mixed Either a language code or a cache array
1649 static function isLocalisationOutOfDate( $cache ) {
1650 if ( !is_array( $cache ) ) {
1651 self
::loadLocalisation( $cache );
1652 $cache = self
::$mLocalisationCache[$cache];
1655 foreach ( $cache['deps'] as $file => $mtime ) {
1656 if ( !file_exists( $file ) ||
filemtime( $file ) > $mtime ) {
1665 * Get the fallback for a given language
1667 static function getFallbackFor( $code ) {
1668 self
::loadLocalisation( $code );
1669 return self
::$mLocalisationCache[$code]['fallback'];
1673 * Get all messages for a given language
1675 static function getMessagesFor( $code ) {
1676 self
::loadLocalisation( $code );
1677 return self
::$mLocalisationCache[$code]['messages'];
1681 * Get a message for a given language
1683 static function getMessageFor( $key, $code ) {
1684 self
::loadLocalisation( $code );
1685 return isset( self
::$mLocalisationCache[$code]['messages'][$key] ) ? self
::$mLocalisationCache[$code]['messages'][$key] : null;
1689 * Load localisation data for this object
1692 if ( !$this->mLoaded
) {
1693 self
::loadLocalisation( $this->getCode() );
1694 $cache =& self
::$mLocalisationCache[$this->getCode()];
1695 foreach ( self
::$mLocalisationKeys as $key ) {
1696 $this->$key = $cache[$key];
1698 $this->mLoaded
= true;
1700 $this->fixUpSettings();
1705 * Do any necessary post-cache-load settings adjustment
1707 function fixUpSettings() {
1708 global $wgExtraNamespaces, $wgMetaNamespace, $wgMetaNamespaceTalk,
1709 $wgNamespaceAliases, $wgAmericanDates;
1710 wfProfileIn( __METHOD__
);
1711 if ( $wgExtraNamespaces ) {
1712 $this->namespaceNames
= $wgExtraNamespaces +
$this->namespaceNames
;
1715 $this->namespaceNames
[NS_PROJECT
] = $wgMetaNamespace;
1716 if ( $wgMetaNamespaceTalk ) {
1717 $this->namespaceNames
[NS_PROJECT_TALK
] = $wgMetaNamespaceTalk;
1719 $talk = $this->namespaceNames
[NS_PROJECT_TALK
];
1720 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
1722 # Allow grammar transformations
1723 # Allowing full message-style parsing would make simple requests
1724 # such as action=raw much more expensive than they need to be.
1725 # This will hopefully cover most cases.
1726 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
1727 array( &$this, 'replaceGrammarInNamespace' ), $talk );
1728 $talk = str_replace( ' ', '_', $talk );
1729 $this->namespaceNames
[NS_PROJECT_TALK
] = $talk;
1732 # The above mixing may leave namespaces out of canonical order.
1733 # Re-order by namespace ID number...
1734 ksort( $this->namespaceNames
);
1736 # Put namespace names and aliases into a hashtable.
1737 # If this is too slow, then we should arrange it so that it is done
1738 # before caching. The catch is that at pre-cache time, the above
1739 # class-specific fixup hasn't been done.
1740 $this->mNamespaceIds
= array();
1741 foreach ( $this->namespaceNames
as $index => $name ) {
1742 $this->mNamespaceIds
[$this->lc($name)] = $index;
1744 if ( $this->namespaceAliases
) {
1745 foreach ( $this->namespaceAliases
as $name => $index ) {
1746 $this->mNamespaceIds
[$this->lc($name)] = $index;
1749 if ( $wgNamespaceAliases ) {
1750 foreach ( $wgNamespaceAliases as $name => $index ) {
1751 $this->mNamespaceIds
[$this->lc($name)] = $index;
1755 if ( $this->defaultDateFormat
== 'dmy or mdy' ) {
1756 $this->defaultDateFormat
= $wgAmericanDates ?
'mdy' : 'dmy';
1758 wfProfileOut( __METHOD__
);
1761 function replaceGrammarInNamespace( $m ) {
1762 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
1765 static function getCaseMaps() {
1766 static $wikiUpperChars, $wikiLowerChars;
1767 if ( isset( $wikiUpperChars ) ) {
1768 return array( $wikiUpperChars, $wikiLowerChars );
1771 wfProfileIn( __METHOD__
);
1772 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
1773 if ( $arr === false ) {
1774 throw new MWException(
1775 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
1778 wfProfileOut( __METHOD__
);
1779 return array( $wikiUpperChars, $wikiLowerChars );