7 if( !defined( 'MEDIAWIKI' ) ) {
8 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
13 # In general you should not make customizations in these language files
14 # directly, but should use the MediaWiki: special namespace to customize
15 # user interface messages through the wiki.
16 # See http://meta.wikipedia.org/wiki/MediaWiki_namespace
18 # NOTE TO TRANSLATORS: Do not copy this whole file when making translations!
19 # A lot of common constants and a base class with inheritable methods are
20 # defined here, which should not be redefined. See the other LanguageXx.php
25 global $wgLanguageNames;
26 require_once( 'Names.php' );
28 global $wgInputEncoding, $wgOutputEncoding;
31 * These are always UTF-8, they exist only for backwards compatibility
33 $wgInputEncoding = "UTF-8";
34 $wgOutputEncoding = "UTF-8";
36 if( function_exists( 'mb_strtoupper' ) ) {
37 mb_internal_encoding('UTF-8');
40 /* a fake language converter */
43 function FakeConverter($langobj) {$this->mLang
= $langobj;}
44 function convert($t, $i) {return $t;}
45 function parserConvert($t, $p) {return $t;}
46 function getVariants() { return array( $this->mLang
->getCode() ); }
47 function getPreferredVariant() {return $this->mLang
->getCode(); }
48 function findVariantLink(&$l, &$n) {}
49 function getExtraHashOptions() {return '';}
50 function getParsedTitle() {return '';}
51 function markNoConversion($text, $noParse=false) {return $text;}
52 function convertCategoryKey( $key ) {return $key; }
53 function convertLinkToAllVariants($text){ return array( $this->mLang
->getCode() => $text); }
54 function armourMath($text){ return $text; }
57 #--------------------------------------------------------------------------
58 # Internationalisation code
59 #--------------------------------------------------------------------------
62 var $mConverter, $mVariants, $mCode, $mLoaded = false;
64 static public $mLocalisationKeys = array( 'fallback', 'namespaceNames',
65 'quickbarSettings', 'skinNames', 'mathNames',
66 'bookstoreList', 'magicWords', 'messages', 'rtl', 'digitTransformTable',
67 'separatorTransformTable', 'fallback8bitEncoding', 'linkPrefixExtension',
68 'defaultUserOptionOverrides', 'linkTrail', 'namespaceAliases',
69 'dateFormats', 'datePreferences', 'datePreferenceMigrationMap',
70 'defaultDateFormat', 'extraUserToggles', 'specialPageAliases' );
72 static public $mMergeableMapKeys = array( 'messages', 'namespaceNames', 'mathNames',
73 'dateFormats', 'defaultUserOptionOverrides', 'magicWords' );
75 static public $mMergeableListKeys = array( 'extraUserToggles' );
77 static public $mMergeableAliasListKeys = array( 'specialPageAliases' );
79 static public $mLocalisationCache = array();
81 static public $mWeekdayMsgs = array(
82 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
86 static public $mWeekdayAbbrevMsgs = array(
87 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
90 static public $mMonthMsgs = array(
91 'january', 'february', 'march', 'april', 'may_long', 'june',
92 'july', 'august', 'september', 'october', 'november',
95 static public $mMonthGenMsgs = array(
96 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
97 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
100 static public $mMonthAbbrevMsgs = array(
101 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
102 'sep', 'oct', 'nov', 'dec'
106 * Create a language object for a given language code
108 static function factory( $code ) {
110 static $recursionLevel = 0;
112 if ( $code == 'en' ) {
115 $class = 'Language' . str_replace( '-', '_', ucfirst( $code ) );
116 // Preload base classes to work around APC/PHP5 bug
117 if ( file_exists( "$IP/languages/classes/$class.deps.php" ) ) {
118 include_once("$IP/languages/classes/$class.deps.php");
120 if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
121 include_once("$IP/languages/classes/$class.php");
125 if ( $recursionLevel > 5 ) {
126 throw new MWException( "Language fallback loop detected when creating class $class\n" );
129 if( ! class_exists( $class ) ) {
130 $fallback = Language
::getFallbackFor( $code );
132 $lang = Language
::factory( $fallback );
134 $lang->setCode( $code );
142 function __construct() {
143 $this->mConverter
= new FakeConverter($this);
144 // Set the code to the name of the descendant
145 if ( get_class( $this ) == 'Language' ) {
148 $this->mCode
= str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
153 * Hook which will be called if this is the content language.
154 * Descendants can use this to register hook functions or modify globals
156 function initContLang() {}
162 function getDefaultUserOptions() {
163 return User
::getDefaultOptions();
167 * Exports $wgBookstoreListEn
170 function getBookstoreList() {
172 return $this->bookstoreList
;
178 function getNamespaces() {
180 return $this->namespaceNames
;
184 * A convenience function that returns the same thing as
185 * getNamespaces() except with the array values changed to ' '
186 * where it found '_', useful for producing output to be displayed
187 * e.g. in <select> forms.
191 function getFormattedNamespaces() {
192 $ns = $this->getNamespaces();
193 foreach($ns as $k => $v) {
194 $ns[$k] = strtr($v, '_', ' ');
200 * Get a namespace value by key
202 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
203 * echo $mw_ns; // prints 'MediaWiki'
206 * @param int $index the array key of the namespace to return
207 * @return mixed, string if the namespace value exists, otherwise false
209 function getNsText( $index ) {
210 $ns = $this->getNamespaces();
211 return isset( $ns[$index] ) ?
$ns[$index] : false;
215 * A convenience function that returns the same thing as
216 * getNsText() except with '_' changed to ' ', useful for
221 function getFormattedNsText( $index ) {
222 $ns = $this->getNsText( $index );
223 return strtr($ns, '_', ' ');
227 * Get a namespace key by value, case insensetive.
229 * @param string $text
230 * @return mixed An integer if $text is a valid value otherwise false
232 function getNsIndex( $text ) {
234 $lctext = $this->lc($text);
235 return isset( $this->mNamespaceIds
[$lctext] ) ?
$this->mNamespaceIds
[$lctext] : false;
239 * short names for language variants used for language conversion links.
241 * @param string $code
244 function getVariantname( $code ) {
245 return $this->getMessageFromDB( "variantname-$code" );
248 function specialPage( $name ) {
249 $aliases = $this->getSpecialPageAliases();
250 if ( isset( $aliases[$name][0] ) ) {
251 $name = $aliases[$name][0];
253 return $this->getNsText(NS_SPECIAL
) . ':' . $name;
256 function getQuickbarSettings() {
258 return $this->quickbarSettings
;
261 function getSkinNames() {
263 return $this->skinNames
;
266 function getMathNames() {
268 return $this->mathNames
;
271 function getDatePreferences() {
273 return $this->datePreferences
;
276 function getDateFormats() {
278 return $this->dateFormats
;
281 function getDefaultDateFormat() {
283 return $this->defaultDateFormat
;
286 function getDatePreferenceMigrationMap() {
288 return $this->datePreferenceMigrationMap
;
291 function getDefaultUserOptionOverrides() {
293 return $this->defaultUserOptionOverrides
;
296 function getExtraUserToggles() {
298 return $this->extraUserToggles
;
301 function getUserToggle( $tog ) {
302 return $this->getMessageFromDB( "tog-$tog" );
306 * Get language names, indexed by code.
307 * If $customisedOnly is true, only returns codes with a messages file
309 public static function getLanguageNames( $customisedOnly = false ) {
310 global $wgLanguageNames;
311 if ( !$customisedOnly ) {
312 return $wgLanguageNames;
316 $messageFiles = glob( "$IP/languages/messages/Messages*.php" );
318 foreach ( $messageFiles as $file ) {
320 if( preg_match( '/Messages([A-Z][a-z_]+)\.php$/', $file, $m ) ) {
321 $code = str_replace( '_', '-', strtolower( $m[1] ) );
322 if ( isset( $wgLanguageNames[$code] ) ) {
323 $names[$code] = $wgLanguageNames[$code];
331 * Ugly hack to get a message maybe from the MediaWiki namespace, if this
332 * language object is the content or user language.
334 function getMessageFromDB( $msg ) {
335 global $wgContLang, $wgLang;
336 if ( $wgContLang->getCode() == $this->getCode() ) {
338 return wfMsgForContent( $msg );
339 } elseif ( $wgLang->getCode() == $this->getCode() ) {
341 return wfMsg( $msg );
343 # Neither, get from localisation
344 return $this->getMessage( $msg );
348 function getLanguageName( $code ) {
349 global $wgLanguageNames;
350 if ( ! array_key_exists( $code, $wgLanguageNames ) ) {
353 return $wgLanguageNames[$code];
356 function getMonthName( $key ) {
357 return $this->getMessageFromDB( self
::$mMonthMsgs[$key-1] );
360 function getMonthNameGen( $key ) {
361 return $this->getMessageFromDB( self
::$mMonthGenMsgs[$key-1] );
364 function getMonthAbbreviation( $key ) {
365 return $this->getMessageFromDB( self
::$mMonthAbbrevMsgs[$key-1] );
368 function getWeekdayName( $key ) {
369 return $this->getMessageFromDB( self
::$mWeekdayMsgs[$key-1] );
372 function getWeekdayAbbreviation( $key ) {
373 return $this->getMessageFromDB( self
::$mWeekdayAbbrevMsgs[$key-1] );
377 * Used by date() and time() to adjust the time output.
379 * @param int $ts the time in date('YmdHis') format
380 * @param mixed $tz adjust the time by this amount (default false,
381 * mean we get user timecorrection setting)
384 function userAdjust( $ts, $tz = false ) {
385 global $wgUser, $wgLocalTZoffset;
388 $tz = $wgUser->getOption( 'timecorrection' );
391 # minutes and hours differences:
396 # Global offset in minutes.
397 if( isset($wgLocalTZoffset) ) {
398 $hrDiff = $wgLocalTZoffset %
60;
399 $minDiff = $wgLocalTZoffset - ($hrDiff * 60);
401 } elseif ( strpos( $tz, ':' ) !== false ) {
402 $tzArray = explode( ':', $tz );
403 $hrDiff = intval($tzArray[0]);
404 $minDiff = intval($hrDiff < 0 ?
-$tzArray[1] : $tzArray[1]);
406 $hrDiff = intval( $tz );
409 # No difference ? Return time unchanged
410 if ( 0 == $hrDiff && 0 == $minDiff ) { return $ts; }
412 # Generate an adjusted date
414 (int)substr( $ts, 8, 2) ) +
$hrDiff, # Hours
415 (int)substr( $ts, 10, 2 ) +
$minDiff, # Minutes
416 (int)substr( $ts, 12, 2 ), # Seconds
417 (int)substr( $ts, 4, 2 ), # Month
418 (int)substr( $ts, 6, 2 ), # Day
419 (int)substr( $ts, 0, 4 ) ); #Year
420 return date( 'YmdHis', $t );
424 * This is a workalike of PHP's date() function, but with better
425 * internationalisation, a reduced set of format characters, and a better
428 * Supported format characters are dDjlNwzWFmMntLYyaAgGhHiscrU. See the
429 * PHP manual for definitions. There are a number of extensions, which
432 * xn Do not translate digits of the next numeric format character
433 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
434 * xr Use roman numerals for the next numeric format character
436 * xg Genitive month name
438 * Characters enclosed in double quotes will be considered literal (with
439 * the quotes themselves removed). Unmatched quotes will be considered
440 * literal quotes. Example:
442 * "The month is" F => The month is January
445 * Backslash escaping is also supported.
447 * @param string $format
448 * @param string $ts 14-character timestamp
452 function sprintfDate( $format, $ts ) {
458 for ( $p = 0; $p < strlen( $format ); $p++
) {
461 if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
462 $code .= $format[++
$p];
473 $rawToggle = !$rawToggle;
479 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
482 $num = substr( $ts, 6, 2 );
485 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
486 $s .= $this->getWeekdayAbbreviation( date( 'w', $unix ) +
1 );
489 $num = intval( substr( $ts, 6, 2 ) );
492 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
493 $s .= $this->getWeekdayName( date( 'w', $unix ) +
1 );
496 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
497 $w = date( 'w', $unix );
501 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
502 $num = date( 'w', $unix );
505 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
506 $num = date( 'z', $unix );
509 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
510 $num = date( 'W', $unix );
513 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
516 $num = substr( $ts, 4, 2 );
519 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
522 $num = intval( substr( $ts, 4, 2 ) );
525 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
526 $num = date( 't', $unix );
529 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
530 $num = date( 'L', $unix );
533 $num = substr( $ts, 0, 4 );
536 $num = substr( $ts, 2, 2 );
539 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'am' : 'pm';
542 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ?
'AM' : 'PM';
545 $h = substr( $ts, 8, 2 );
546 $num = $h %
12 ?
$h %
12 : 12;
549 $num = intval( substr( $ts, 8, 2 ) );
552 $h = substr( $ts, 8, 2 );
553 $num = sprintf( '%02d', $h %
12 ?
$h %
12 : 12 );
556 $num = substr( $ts, 8, 2 );
559 $num = substr( $ts, 10, 2 );
562 $num = substr( $ts, 12, 2 );
565 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
566 $s .= date( 'c', $unix );
569 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
570 $s .= date( 'r', $unix );
573 if ( !$unix ) $unix = wfTimestamp( TS_UNIX
, $ts );
578 if ( $p < strlen( $format ) - 1 ) {
586 if ( $p < strlen( $format ) - 1 ) {
587 $endQuote = strpos( $format, '"', $p +
1 );
588 if ( $endQuote === false ) {
589 # No terminating quote, assume literal "
592 $s .= substr( $format, $p +
1, $endQuote - $p - 1 );
596 # Quote at end of string, assume literal "
603 if ( $num !== false ) {
604 if ( $rawToggle ||
$raw ) {
607 } elseif ( $roman ) {
608 $s .= self
::romanNumeral( $num );
611 $s .= $this->formatNum( $num, true );
620 * Roman number formatting up to 3000
622 static function romanNumeral( $num ) {
623 static $table = array(
624 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
625 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
626 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
627 array( '', 'M', 'MM', 'MMM' )
630 $num = intval( $num );
631 if ( $num > 3000 ||
$num <= 0 ) {
636 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
637 if ( $num >= $pow10 ) {
638 $s .= $table[$i][floor($num / $pow10)];
640 $num = $num %
$pow10;
646 * This is meant to be used by time(), date(), and timeanddate() to get
647 * the date preference they're supposed to use, it should be used in
651 * function timeanddate([...], $format = true) {
652 * $datePreference = $this->dateFormat($format);
657 * @param mixed $usePrefs: if true, the user's preference is used
658 * if false, the site/language default is used
659 * if int/string, assumed to be a format.
662 function dateFormat( $usePrefs = true ) {
665 if( is_bool( $usePrefs ) ) {
667 $datePreference = $wgUser->getDatePreference();
669 $options = User
::getDefaultOptions();
670 $datePreference = (string)$options['date'];
673 $datePreference = (string)$usePrefs;
677 if( $datePreference == '' ) {
681 return $datePreference;
686 * @param mixed $ts the time format which needs to be turned into a
687 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
688 * @param bool $adj whether to adjust the time output according to the
689 * user configured offset ($timecorrection)
690 * @param mixed $format true to use user's date format preference
691 * @param string $timecorrection the time offset as returned by
692 * validateTimeZone() in Special:Preferences
695 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
698 $ts = $this->userAdjust( $ts, $timecorrection );
701 $pref = $this->dateFormat( $format );
702 if( $pref == 'default' ||
!isset( $this->dateFormats
["$pref date"] ) ) {
703 $pref = $this->defaultDateFormat
;
705 return $this->sprintfDate( $this->dateFormats
["$pref date"], $ts );
710 * @param mixed $ts the time format which needs to be turned into a
711 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
712 * @param bool $adj whether to adjust the time output according to the
713 * user configured offset ($timecorrection)
714 * @param mixed $format true to use user's date format preference
715 * @param string $timecorrection the time offset as returned by
716 * validateTimeZone() in Special:Preferences
719 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
722 $ts = $this->userAdjust( $ts, $timecorrection );
725 $pref = $this->dateFormat( $format );
726 if( $pref == 'default' ||
!isset( $this->dateFormats
["$pref time"] ) ) {
727 $pref = $this->defaultDateFormat
;
729 return $this->sprintfDate( $this->dateFormats
["$pref time"], $ts );
734 * @param mixed $ts the time format which needs to be turned into a
735 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
736 * @param bool $adj whether to adjust the time output according to the
737 * user configured offset ($timecorrection)
739 * @param mixed $format what format to return, if it's false output the
740 * default one (default true)
741 * @param string $timecorrection the time offset as returned by
742 * validateTimeZone() in Special:Preferences
745 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false) {
748 $ts = $this->userAdjust( $ts, $timecorrection );
751 $pref = $this->dateFormat( $format );
752 if( $pref == 'default' ||
!isset( $this->dateFormats
["$pref both"] ) ) {
753 $pref = $this->defaultDateFormat
;
756 return $this->sprintfDate( $this->dateFormats
["$pref both"], $ts );
759 function getMessage( $key ) {
761 return isset( $this->messages
[$key] ) ?
$this->messages
[$key] : null;
764 function getAllMessages() {
766 return $this->messages
;
769 function iconv( $in, $out, $string ) {
770 # For most languages, this is a wrapper for iconv
771 return iconv( $in, $out, $string );
774 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
775 function ucwordbreaksCallbackAscii($matches){
776 return $this->ucfirst($matches[1]);
779 function ucwordbreaksCallbackMB($matches){
780 return mb_strtoupper($matches[0]);
783 function ucCallback($matches){
784 list( $wikiUpperChars ) = self
::getCaseMaps();
785 return strtr( $matches[1], $wikiUpperChars );
788 function lcCallback($matches){
789 list( , $wikiLowerChars ) = self
::getCaseMaps();
790 return strtr( $matches[1], $wikiLowerChars );
793 function ucwordsCallbackMB($matches){
794 return mb_strtoupper($matches[0]);
797 function ucwordsCallbackWiki($matches){
798 list( $wikiUpperChars ) = self
::getCaseMaps();
799 return strtr( $matches[0], $wikiUpperChars );
802 function ucfirst( $str ) {
803 return self
::uc( $str, true );
806 function uc( $str, $first = false ) {
807 if ( function_exists( 'mb_strtoupper' ) ) {
809 if ( self
::isMultibyte( $str ) ) {
810 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
812 return ucfirst( $str );
815 return self
::isMultibyte( $str ) ?
mb_strtoupper( $str ) : strtoupper( $str );
818 if ( self
::isMultibyte( $str ) ) {
819 list( $wikiUpperChars ) = $this->getCaseMaps();
820 $x = $first ?
'^' : '';
821 return preg_replace_callback(
822 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
823 array($this,"ucCallback"),
827 return $first ?
ucfirst( $str ) : strtoupper( $str );
832 function lcfirst( $str ) {
833 return self
::lc( $str, true );
836 function lc( $str, $first = false ) {
837 if ( function_exists( 'mb_strtolower' ) )
839 if ( self
::isMultibyte( $str ) )
840 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
842 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
844 return self
::isMultibyte( $str ) ?
mb_strtolower( $str ) : strtolower( $str );
846 if ( self
::isMultibyte( $str ) ) {
847 list( , $wikiLowerChars ) = self
::getCaseMaps();
848 $x = $first ?
'^' : '';
849 return preg_replace_callback(
850 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
851 array($this,"lcCallback"),
855 return $first ?
strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
858 function isMultibyte( $str ) {
859 return (bool)preg_match( '/[\x80-\xff]/', $str );
862 function ucwords($str) {
863 if ( self
::isMultibyte( $str ) ) {
864 $str = self
::lc($str);
866 // regexp to find first letter in each word (i.e. after each space)
867 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
869 // function to use to capitalize a single char
870 if ( function_exists( 'mb_strtoupper' ) )
871 return preg_replace_callback(
873 array($this,"ucwordsCallbackMB"),
877 return preg_replace_callback(
879 array($this,"ucwordsCallbackWiki"),
884 return ucwords( strtolower( $str ) );
887 # capitalize words at word breaks
888 function ucwordbreaks($str){
889 if (self
::isMultibyte( $str ) ) {
890 $str = self
::lc($str);
892 // since \b doesn't work for UTF-8, we explicitely define word break chars
893 $breaks= "[ \-\(\)\}\{\.,\?!]";
895 // find first letter after word break
896 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
898 if ( function_exists( 'mb_strtoupper' ) )
899 return preg_replace_callback(
901 array($this,"ucwordbreaksCallbackMB"),
905 return preg_replace_callback(
907 array($this,"ucwordsCallbackWiki"),
912 return preg_replace_callback(
913 '/\b([\w\x80-\xff]+)\b/',
914 array($this,"ucwordbreaksCallbackAscii"),
919 * Return a case-folded representation of $s
921 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
922 * and $s2 are the same except for the case of their characters. It is not
923 * necessary for the value returned to make sense when displayed.
925 * Do *not* perform any other normalisation in this function. If a caller
926 * uses this function when it should be using a more general normalisation
927 * function, then fix the caller.
929 function caseFold( $s ) {
930 return $this->uc( $s );
933 function checkTitleEncoding( $s ) {
934 if( is_array( $s ) ) {
935 wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
937 # Check for non-UTF-8 URLs
938 $ishigh = preg_match( '/[\x80-\xff]/', $s);
939 if(!$ishigh) return $s;
941 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
942 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
943 if( $isutf8 ) return $s;
945 return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
948 function fallback8bitEncoding() {
950 return $this->fallback8bitEncoding
;
954 * Some languages have special punctuation to strip out
955 * or characters which need to be converted for MySQL's
956 * indexing to grok it correctly. Make such changes here.
961 function stripForSearch( $string ) {
963 if ( $wgDBtype != 'mysql' ) {
967 # MySQL fulltext index doesn't grok utf-8, so we
968 # need to fold cases and convert to hex
970 wfProfileIn( __METHOD__
);
971 if( function_exists( 'mb_strtolower' ) ) {
973 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
974 "'U8' . bin2hex( \"$1\" )",
975 mb_strtolower( $string ) );
977 list( , $wikiLowerChars ) = self
::getCaseMaps();
979 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
980 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
983 wfProfileOut( __METHOD__
);
987 function convertForSearchResult( $termsArray ) {
988 # some languages, e.g. Chinese, need to do a conversion
989 # in order for search results to be displayed correctly
994 * Get the first character of a string.
999 function firstChar( $s ) {
1001 preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1002 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
1004 return isset( $matches[1] ) ?
$matches[1] : "";
1007 function initEncoding() {
1008 # Some languages may have an alternate char encoding option
1009 # (Esperanto X-coding, Japanese furigana conversion, etc)
1010 # If this language is used as the primary content language,
1011 # an override to the defaults can be set here on startup.
1014 function recodeForEdit( $s ) {
1015 # For some languages we'll want to explicitly specify
1016 # which characters make it into the edit box raw
1017 # or are converted in some way or another.
1018 # Note that if wgOutputEncoding is different from
1019 # wgInputEncoding, this text will be further converted
1020 # to wgOutputEncoding.
1021 global $wgEditEncoding;
1022 if( $wgEditEncoding == '' or
1023 $wgEditEncoding == 'UTF-8' ) {
1026 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
1030 function recodeInput( $s ) {
1031 # Take the previous into account.
1032 global $wgEditEncoding;
1033 if($wgEditEncoding != "") {
1034 $enc = $wgEditEncoding;
1038 if( $enc == 'UTF-8' ) {
1041 return $this->iconv( $enc, 'UTF-8', $s );
1046 * For right-to-left language support
1056 * A hidden direction mark (LRM or RLM), depending on the language direction
1060 function getDirMark() {
1061 return $this->isRTL() ?
"\xE2\x80\x8F" : "\xE2\x80\x8E";
1065 * An arrow, depending on the language direction
1069 function getArrow() {
1070 return $this->isRTL() ?
'←' : '→';
1074 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
1078 function linkPrefixExtension() {
1080 return $this->linkPrefixExtension
;
1083 function &getMagicWords() {
1085 return $this->magicWords
;
1088 # Fill a MagicWord object with data from here
1089 function getMagic( &$mw ) {
1090 if ( !isset( $this->mMagicExtensions
) ) {
1091 $this->mMagicExtensions
= array();
1092 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions
, $this->getCode() ) );
1094 if ( isset( $this->mMagicExtensions
[$mw->mId
] ) ) {
1095 $rawEntry = $this->mMagicExtensions
[$mw->mId
];
1097 $magicWords =& $this->getMagicWords();
1098 if ( isset( $magicWords[$mw->mId
] ) ) {
1099 $rawEntry = $magicWords[$mw->mId
];
1101 # Fall back to English if local list is incomplete
1102 $magicWords =& Language
::getMagicWords();
1103 $rawEntry = $magicWords[$mw->mId
];
1107 if( !is_array( $rawEntry ) ) {
1108 error_log( "\"$rawEntry\" is not a valid magic thingie for \"$mw->mId\"" );
1110 $mw->mCaseSensitive
= $rawEntry[0];
1111 $mw->mSynonyms
= array_slice( $rawEntry, 1 );
1115 * Get special page names, as an associative array
1116 * case folded alias => real name
1118 function getSpecialPageAliases() {
1120 if ( !isset( $this->mExtendedSpecialPageAliases
) ) {
1121 $this->mExtendedSpecialPageAliases
= $this->specialPageAliases
;
1122 wfRunHooks( 'LangugeGetSpecialPageAliases',
1123 array( &$this->mExtendedSpecialPageAliases
, $this->getCode() ) );
1125 return $this->mExtendedSpecialPageAliases
;
1129 * Italic is unsuitable for some languages
1133 * @param string $text The text to be emphasized.
1136 function emphasize( $text ) {
1137 return "<em>$text</em>";
1141 * Normally we output all numbers in plain en_US style, that is
1142 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
1143 * point twohundredthirtyfive. However this is not sutable for all
1144 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
1145 * Icelandic just want to use commas instead of dots, and dots instead
1146 * of commas like "293.291,235".
1148 * An example of this function being called:
1150 * wfMsg( 'message', $wgLang->formatNum( $num ) )
1153 * See LanguageGu.php for the Gujarati implementation and
1154 * LanguageIs.php for the , => . and . => , implementation.
1156 * @todo check if it's viable to use localeconv() for the decimal
1159 * @param mixed $number the string to be formatted, should be an integer or
1160 * a floating point number.
1161 * @param bool $nocommafy Set to true for special numbers like dates
1164 function formatNum( $number, $nocommafy = false ) {
1165 global $wgTranslateNumerals;
1167 $number = $this->commafy($number);
1168 $s = $this->separatorTransformTable();
1169 if (!is_null($s)) { $number = strtr($number, $s); }
1172 if ($wgTranslateNumerals) {
1173 $s = $this->digitTransformTable();
1174 if (!is_null($s)) { $number = strtr($number, $s); }
1180 function parseFormattedNumber( $number ) {
1181 $s = $this->digitTransformTable();
1182 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1184 $s = $this->separatorTransformTable();
1185 if (!is_null($s)) { $number = strtr($number, array_flip($s)); }
1187 $number = strtr( $number, array (',' => '') );
1192 * Adds commas to a given number
1197 function commafy($_) {
1198 return strrev((string)preg_replace('/(\d{3})(?=\d)(?!\d*\.)/','$1,',strrev($_)));
1201 function digitTransformTable() {
1203 return $this->digitTransformTable
;
1206 function separatorTransformTable() {
1208 return $this->separatorTransformTable
;
1213 * For the credit list in includes/Credits.php (action=credits)
1218 function listToText( $l ) {
1221 for ($i = $m; $i >= 0; $i--) {
1224 } else if ($i == $m - 1) {
1225 $s = $l[$i] . ' ' . $this->getMessageFromDB( 'and' ) . ' ' . $s;
1227 $s = $l[$i] . ', ' . $s;
1233 # Crop a string from the beginning or end to a certain number of bytes.
1234 # (Bytes are used because our storage has limited byte lengths for some
1235 # columns in the database.) Multibyte charsets will need to make sure that
1236 # only whole characters are included!
1238 # $length does not include the optional ellipsis.
1239 # If $length is negative, snip from the beginning
1240 function truncate( $string, $length, $ellipsis = "" ) {
1241 if( $length == 0 ) {
1244 if ( strlen( $string ) <= abs( $length ) ) {
1248 $string = substr( $string, 0, $length );
1249 $char = ord( $string[strlen( $string ) - 1] );
1251 if ($char >= 0xc0) {
1252 # We got the first byte only of a multibyte char; remove it.
1253 $string = substr( $string, 0, -1 );
1254 } elseif( $char >= 0x80 &&
1255 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
1256 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
1257 # We chopped in the middle of a character; remove it
1260 return $string . $ellipsis;
1262 $string = substr( $string, $length );
1263 $char = ord( $string[0] );
1264 if( $char >= 0x80 && $char < 0xc0 ) {
1265 # We chopped in the middle of a character; remove the whole thing
1266 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
1268 return $ellipsis . $string;
1273 * Grammatical transformations, needed for inflected languages
1274 * Invoked by putting {{grammar:case|word}} in a message
1276 * @param string $word
1277 * @param string $case
1280 function convertGrammar( $word, $case ) {
1281 global $wgGrammarForms;
1282 if ( isset($wgGrammarForms['en'][$case][$word]) ) {
1283 return $wgGrammarForms['en'][$case][$word];
1289 * Plural form transformations, needed for some languages.
1290 * For example, where are 3 form of plural in Russian and Polish,
1291 * depending on "count mod 10". See [[w:Plural]]
1292 * For English it is pretty simple.
1294 * Invoked by putting {{plural:count|wordform1|wordform2}}
1295 * or {{plural:count|wordform1|wordform2|wordform3}}
1297 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
1299 * @param integer $count
1300 * @param string $wordform1
1301 * @param string $wordform2
1302 * @param string $wordform3 (optional)
1303 * @param string $wordform4 (optional)
1304 * @param string $wordform5 (optional)
1307 function convertPlural( $count, $w1, $w2, $w3, $w4, $w5) {
1308 return ( $count == '1' ||
$count == '-1' ) ?
$w1 : $w2;
1312 * For translaing of expiry times
1313 * @param string The validated block time in English
1314 * @return Somehow translated block time
1315 * @see LanguageFi.php for example implementation
1317 function translateBlockExpiry( $str ) {
1319 $scBlockExpiryOptions = $this->getMessageFromDB( 'ipboptions' );
1321 if ( $scBlockExpiryOptions == '-') {
1325 foreach (explode(',', $scBlockExpiryOptions) as $option) {
1326 if ( strpos($option, ":") === false )
1328 list($show, $value) = explode(":", $option);
1329 if ( strcmp ( $str, $value) == 0 )
1330 return '<span title="' . htmlspecialchars($str). '">' .
1331 htmlspecialchars( trim( $show ) ) . '</span>';
1338 * languages like Chinese need to be segmented in order for the diff
1341 * @param string $text
1344 function segmentForDiff( $text ) {
1349 * and unsegment to show the result
1351 * @param string $text
1354 function unsegmentForDiff( $text ) {
1358 # convert text to different variants of a language.
1359 function convert( $text, $isTitle = false) {
1360 return $this->mConverter
->convert($text, $isTitle);
1363 # Convert text from within Parser
1364 function parserConvert( $text, &$parser ) {
1365 return $this->mConverter
->parserConvert( $text, $parser );
1368 # Check if this is a language with variants
1369 function hasVariants(){
1370 return sizeof($this->getVariants())>1;
1373 # Put custom tags (e.g. -{ }-) around math to prevent conversion
1374 function armourMath($text){
1375 return $this->mConverter
->armourMath($text);
1380 * Perform output conversion on a string, and encode for safe HTML output.
1381 * @param string $text
1382 * @param bool $isTitle -- wtf?
1384 * @todo this should get integrated somewhere sane
1386 function convertHtml( $text, $isTitle = false ) {
1387 return htmlspecialchars( $this->convert( $text, $isTitle ) );
1390 function convertCategoryKey( $key ) {
1391 return $this->mConverter
->convertCategoryKey( $key );
1395 * get the list of variants supported by this langauge
1396 * see sample implementation in LanguageZh.php
1398 * @return array an array of language codes
1400 function getVariants() {
1401 return $this->mConverter
->getVariants();
1405 function getPreferredVariant( $fromUser = true ) {
1406 return $this->mConverter
->getPreferredVariant( $fromUser );
1410 * if a language supports multiple variants, it is
1411 * possible that non-existing link in one variant
1412 * actually exists in another variant. this function
1413 * tries to find it. See e.g. LanguageZh.php
1415 * @param string $link the name of the link
1416 * @param mixed $nt the title object of the link
1417 * @return null the input parameters may be modified upon return
1419 function findVariantLink( &$link, &$nt ) {
1420 $this->mConverter
->findVariantLink($link, $nt);
1424 * If a language supports multiple variants, converts text
1425 * into an array of all possible variants of the text:
1426 * 'variant' => text in that variant
1429 function convertLinkToAllVariants($text){
1430 return $this->mConverter
->convertLinkToAllVariants($text);
1435 * returns language specific options used by User::getPageRenderHash()
1436 * for example, the preferred language variant
1441 function getExtraHashOptions() {
1442 return $this->mConverter
->getExtraHashOptions();
1446 * for languages that support multiple variants, the title of an
1447 * article may be displayed differently in different variants. this
1448 * function returns the apporiate title defined in the body of the article.
1452 function getParsedTitle() {
1453 return $this->mConverter
->getParsedTitle();
1457 * Enclose a string with the "no conversion" tag. This is used by
1458 * various functions in the Parser
1460 * @param string $text text to be tagged for no conversion
1461 * @return string the tagged text
1463 function markNoConversion( $text, $noParse=false ) {
1464 return $this->mConverter
->markNoConversion( $text, $noParse );
1468 * A regular expression to match legal word-trailing characters
1469 * which should be merged onto a link of the form [[foo]]bar.
1474 function linkTrail() {
1476 return $this->linkTrail
;
1479 function getLangObj() {
1484 * Get the RFC 3066 code for this language object
1486 function getCode() {
1487 return $this->mCode
;
1490 function setCode( $code ) {
1491 $this->mCode
= $code;
1494 static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
1495 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
1498 static function getMessagesFileName( $code ) {
1500 return self
::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
1503 static function getClassFileName( $code ) {
1505 return self
::getFileName( "$IP/languages/classes/Language", $code, '.php' );
1508 static function getLocalisationArray( $code, $disableCache = false ) {
1509 self
::loadLocalisation( $code, $disableCache );
1510 return self
::$mLocalisationCache[$code];
1514 * Load localisation data for a given code into the static cache
1516 * @return array Dependencies, map of filenames to mtimes
1518 static function loadLocalisation( $code, $disableCache = false ) {
1519 static $recursionGuard = array();
1523 throw new MWException( "Invalid language code requested" );
1526 if ( !$disableCache ) {
1527 # Try the per-process cache
1528 if ( isset( self
::$mLocalisationCache[$code] ) ) {
1529 return self
::$mLocalisationCache[$code]['deps'];
1532 wfProfileIn( __METHOD__
);
1534 # Try the serialized directory
1535 $cache = wfGetPrecompiledData( self
::getFileName( "Messages", $code, '.ser' ) );
1537 self
::$mLocalisationCache[$code] = $cache;
1538 wfDebug( "Language::loadLocalisation(): got localisation for $code from precompiled data file\n" );
1539 wfProfileOut( __METHOD__
);
1540 return self
::$mLocalisationCache[$code]['deps'];
1543 # Try the global cache
1544 $memcKey = wfMemcKey('localisation', $code );
1545 $cache = $wgMemc->get( $memcKey );
1547 # Check file modification times
1548 foreach ( $cache['deps'] as $file => $mtime ) {
1549 if ( !file_exists( $file ) ||
filemtime( $file ) > $mtime ) {
1553 if ( self
::isLocalisationOutOfDate( $cache ) ) {
1554 $wgMemc->delete( $memcKey );
1556 wfDebug( "Language::loadLocalisation(): localisation cache for $code had expired due to update of $file\n" );
1558 self
::$mLocalisationCache[$code] = $cache;
1559 wfDebug( "Language::loadLocalisation(): got localisation for $code from cache\n" );
1560 wfProfileOut( __METHOD__
);
1561 return $cache['deps'];
1565 wfProfileIn( __METHOD__
);
1568 # Default fallback, may be overridden when the messages file is included
1569 if ( $code != 'en' ) {
1575 # Load the primary localisation from the source file
1576 $filename = self
::getMessagesFileName( $code );
1577 if ( !file_exists( $filename ) ) {
1578 wfDebug( "Language::loadLocalisation(): no localisation file for $code, using implicit fallback to en\n" );
1582 $deps = array( $filename => filemtime( $filename ) );
1583 require( $filename );
1584 $cache = compact( self
::$mLocalisationKeys );
1585 wfDebug( "Language::loadLocalisation(): got localisation for $code from source\n" );
1588 if ( !empty( $fallback ) ) {
1589 # Load the fallback localisation, with a circular reference guard
1590 if ( isset( $recursionGuard[$code] ) ) {
1591 throw new MWException( "Error: Circular fallback reference in language code $code" );
1593 $recursionGuard[$code] = true;
1594 $newDeps = self
::loadLocalisation( $fallback, $disableCache );
1595 unset( $recursionGuard[$code] );
1597 $secondary = self
::$mLocalisationCache[$fallback];
1598 $deps = array_merge( $deps, $newDeps );
1600 # Merge the fallback localisation with the current localisation
1601 foreach ( self
::$mLocalisationKeys as $key ) {
1602 if ( isset( $cache[$key] ) ) {
1603 if ( isset( $secondary[$key] ) ) {
1604 if ( in_array( $key, self
::$mMergeableMapKeys ) ) {
1605 $cache[$key] = $cache[$key] +
$secondary[$key];
1606 } elseif ( in_array( $key, self
::$mMergeableListKeys ) ) {
1607 $cache[$key] = array_merge( $secondary[$key], $cache[$key] );
1608 } elseif ( in_array( $key, self
::$mMergeableAliasListKeys ) ) {
1609 $cache[$key] = array_merge_recursive( $cache[$key], $secondary[$key] );
1613 $cache[$key] = $secondary[$key];
1617 # Merge bookstore lists if requested
1618 if ( !empty( $cache['bookstoreList']['inherit'] ) ) {
1619 $cache['bookstoreList'] = array_merge( $cache['bookstoreList'], $secondary['bookstoreList'] );
1621 if ( isset( $cache['bookstoreList']['inherit'] ) ) {
1622 unset( $cache['bookstoreList']['inherit'] );
1626 # Add dependencies to the cache entry
1627 $cache['deps'] = $deps;
1629 # Replace spaces with underscores in namespace names
1630 $cache['namespaceNames'] = str_replace( ' ', '_', $cache['namespaceNames'] );
1632 # Save to both caches
1633 self
::$mLocalisationCache[$code] = $cache;
1634 if ( !$disableCache ) {
1635 $wgMemc->set( $memcKey, $cache );
1638 wfProfileOut( __METHOD__
);
1643 * Test if a given localisation cache is out of date with respect to the
1644 * source Messages files. This is done automatically for the global cache
1645 * in $wgMemc, but is only done on certain occasions for the serialized
1648 * @param $cache mixed Either a language code or a cache array
1650 static function isLocalisationOutOfDate( $cache ) {
1651 if ( !is_array( $cache ) ) {
1652 self
::loadLocalisation( $cache );
1653 $cache = self
::$mLocalisationCache[$cache];
1656 foreach ( $cache['deps'] as $file => $mtime ) {
1657 if ( !file_exists( $file ) ||
filemtime( $file ) > $mtime ) {
1666 * Get the fallback for a given language
1668 static function getFallbackFor( $code ) {
1669 self
::loadLocalisation( $code );
1670 return self
::$mLocalisationCache[$code]['fallback'];
1674 * Get all messages for a given language
1676 static function getMessagesFor( $code ) {
1677 self
::loadLocalisation( $code );
1678 return self
::$mLocalisationCache[$code]['messages'];
1682 * Get a message for a given language
1684 static function getMessageFor( $key, $code ) {
1685 self
::loadLocalisation( $code );
1686 return isset( self
::$mLocalisationCache[$code]['messages'][$key] ) ? self
::$mLocalisationCache[$code]['messages'][$key] : null;
1690 * Load localisation data for this object
1693 if ( !$this->mLoaded
) {
1694 self
::loadLocalisation( $this->getCode() );
1695 $cache =& self
::$mLocalisationCache[$this->getCode()];
1696 foreach ( self
::$mLocalisationKeys as $key ) {
1697 $this->$key = $cache[$key];
1699 $this->mLoaded
= true;
1701 $this->fixUpSettings();
1706 * Do any necessary post-cache-load settings adjustment
1708 function fixUpSettings() {
1709 global $wgExtraNamespaces, $wgMetaNamespace, $wgMetaNamespaceTalk,
1710 $wgNamespaceAliases, $wgAmericanDates;
1711 wfProfileIn( __METHOD__
);
1712 if ( $wgExtraNamespaces ) {
1713 $this->namespaceNames
= $wgExtraNamespaces +
$this->namespaceNames
;
1716 $this->namespaceNames
[NS_PROJECT
] = $wgMetaNamespace;
1717 if ( $wgMetaNamespaceTalk ) {
1718 $this->namespaceNames
[NS_PROJECT_TALK
] = $wgMetaNamespaceTalk;
1720 $talk = $this->namespaceNames
[NS_PROJECT_TALK
];
1721 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
1723 # Allow grammar transformations
1724 # Allowing full message-style parsing would make simple requests
1725 # such as action=raw much more expensive than they need to be.
1726 # This will hopefully cover most cases.
1727 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
1728 array( &$this, 'replaceGrammarInNamespace' ), $talk );
1729 $talk = str_replace( ' ', '_', $talk );
1730 $this->namespaceNames
[NS_PROJECT_TALK
] = $talk;
1733 # The above mixing may leave namespaces out of canonical order.
1734 # Re-order by namespace ID number...
1735 ksort( $this->namespaceNames
);
1737 # Put namespace names and aliases into a hashtable.
1738 # If this is too slow, then we should arrange it so that it is done
1739 # before caching. The catch is that at pre-cache time, the above
1740 # class-specific fixup hasn't been done.
1741 $this->mNamespaceIds
= array();
1742 foreach ( $this->namespaceNames
as $index => $name ) {
1743 $this->mNamespaceIds
[$this->lc($name)] = $index;
1745 if ( $this->namespaceAliases
) {
1746 foreach ( $this->namespaceAliases
as $name => $index ) {
1747 $this->mNamespaceIds
[$this->lc($name)] = $index;
1750 if ( $wgNamespaceAliases ) {
1751 foreach ( $wgNamespaceAliases as $name => $index ) {
1752 $this->mNamespaceIds
[$this->lc($name)] = $index;
1756 if ( $this->defaultDateFormat
== 'dmy or mdy' ) {
1757 $this->defaultDateFormat
= $wgAmericanDates ?
'mdy' : 'dmy';
1759 wfProfileOut( __METHOD__
);
1762 function replaceGrammarInNamespace( $m ) {
1763 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
1766 static function getCaseMaps() {
1767 static $wikiUpperChars, $wikiLowerChars;
1768 if ( isset( $wikiUpperChars ) ) {
1769 return array( $wikiUpperChars, $wikiLowerChars );
1772 wfProfileIn( __METHOD__
);
1773 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
1774 if ( $arr === false ) {
1775 throw new MWException(
1776 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
1779 wfProfileOut( __METHOD__
);
1780 return array( $wikiUpperChars, $wikiLowerChars );