* (bug 6254) Update to Indonesian translation (id) #20
[lhc/web/wiklou.git] / languages / Language.php
index eacf153..7779a34 100644 (file)
@@ -226,28 +226,28 @@ $wgLanguageNamesEn =& $wgLanguageNames;
        MAG_CURRENTDAYNAME       => array( 1,    'CURRENTDAYNAME'         ),
        MAG_CURRENTYEAR          => array( 1,    'CURRENTYEAR'            ),
        MAG_CURRENTTIME          => array( 1,    'CURRENTTIME'            ),
-       MAG_NUMBEROFPAGES                => array( 1,    'NUMBEROFPAGES'                  ),
+       MAG_NUMBEROFPAGES        => array( 1,    'NUMBEROFPAGES'          ),
        MAG_NUMBEROFARTICLES     => array( 1,    'NUMBEROFARTICLES'       ),
        MAG_NUMBEROFFILES        => array( 1,    'NUMBEROFFILES'          ),
-       MAG_NUMBEROFUSERS                => array( 1,    'NUMBEROFUSERS'                  ),
+       MAG_NUMBEROFUSERS        => array( 1,    'NUMBEROFUSERS'          ),
        MAG_PAGENAME             => array( 1,    'PAGENAME'               ),
        MAG_PAGENAMEE            => array( 1,    'PAGENAMEE'              ),
        MAG_NAMESPACE            => array( 1,    'NAMESPACE'              ),
        MAG_NAMESPACEE           => array( 1,    'NAMESPACEE'             ),
-       MAG_TALKSPACE                    => array( 1,    'TALKSPACE'                      ),
-       MAG_TALKSPACEE                   => array( 1,    'TALKSPACEE'                     ),
-       MAG_SUBJECTSPACE                 => array( 1,    'SUBJECTSPACE', 'ARTICLESPACE' ),
-       MAG_SUBJECTSPACEE                => array( 1,    'SUBJECTSPACEE', 'ARTICLESPACEE' ),
+       MAG_TALKSPACE            => array( 1,    'TALKSPACE'              ),
+       MAG_TALKSPACEE           => array( 1,    'TALKSPACEE'              ),
+       MAG_SUBJECTSPACE         => array( 1,    'SUBJECTSPACE', 'ARTICLESPACE' ),
+       MAG_SUBJECTSPACEE        => array( 1,    'SUBJECTSPACEE', 'ARTICLESPACEE' ),
        MAG_FULLPAGENAME         => array( 1,    'FULLPAGENAME'           ),
        MAG_FULLPAGENAMEE        => array( 1,    'FULLPAGENAMEE'          ),
-       MAG_SUBPAGENAME          => array( 1,    'SUBPAGENAME'                    ),
-       MAG_SUBPAGENAMEE                 => array( 1,    'SUBPAGENAMEE'                   ),
-       MAG_BASEPAGENAME                 => array( 1,    'BASEPAGENAME'                   ),
-       MAG_BASEPAGENAMEE                => array( 1,    'BASEPAGENAMEE'                  ),
-       MAG_TALKPAGENAME                 => array( 1,    'TALKPAGENAME'                   ),
-       MAG_TALKPAGENAMEE                => array( 1,    'TALKPAGENAMEE'                  ),
-       MAG_SUBJECTPAGENAME              => array( 1,    'SUBJECTPAGENAME', 'ARTICLEPAGENAME' ),
-       MAG_SUBJECTPAGENAMEE     => array( 1,    'SUBJECTPAGENAMEE', 'ARTICLEPAGENAMEE' ),
+       MAG_SUBPAGENAME          => array( 1,    'SUBPAGENAME'            ),
+       MAG_SUBPAGENAMEE         => array( 1,    'SUBPAGENAMEE'           ),
+       MAG_BASEPAGENAME         => array( 1,    'BASEPAGENAME'           ),
+       MAG_BASEPAGENAMEE        => array( 1,    'BASEPAGENAMEE'          ),
+       MAG_TALKPAGENAME         => array( 1,    'TALKPAGENAME'           ),
+       MAG_TALKPAGENAMEE        => array( 1,    'TALKPAGENAMEE'          ),
+       MAG_SUBJECTPAGENAME      => array( 1,    'SUBJECTPAGENAME', 'ARTICLEPAGENAME' ),
+       MAG_SUBJECTPAGENAMEE     => array( 1,    'SUBJECTPAGENAMEE', 'ARTICLEPAGENAMEE' ),
        MAG_MSG                  => array( 0,    'MSG:'                   ),
        MAG_SUBST                => array( 0,    'SUBST:'                 ),
        MAG_MSGNW                => array( 0,    'MSGNW:'                 ),
@@ -283,11 +283,14 @@ $wgLanguageNamesEn =& $wgLanguageNames;
        MAG_UC                   => array( 0,    'UC:'                    ),
        MAG_RAW                  => array( 0,    'RAW:'                   ),
        MAG_DISPLAYTITLE         => array( 1,    'DISPLAYTITLE'           ),
-       MAG_RAWSUFFIX                    => array( 1,    'R'                                      ),
-       MAG_NEWSECTIONLINK               => array( 1,    '__NEWSECTIONLINK__'     ),
-       MAG_CURRENTVERSION               => array( 1,    'CURRENTVERSION'                 ),
-       MAG_URLENCODE                    => array( 0,    'URLENCODE:'                     ),
-       MAG_CURRENTTIMESTAMP     => array( 1,    'CURRENTTIMESTAMP'               ),
+       MAG_RAWSUFFIX            => array( 1,    'R'                      ),
+       MAG_NEWSECTIONLINK       => array( 1,    '__NEWSECTIONLINK__'     ),
+       MAG_CURRENTVERSION       => array( 1,    'CURRENTVERSION'         ),
+       MAG_URLENCODE            => array( 0,    'URLENCODE:'             ),
+       MAG_CURRENTTIMESTAMP     => array( 1,    'CURRENTTIMESTAMP'       ),
+       MAG_DIRECTIONMARK        => array( 1,    'DIRECTIONMARK', 'DIRMARK' ),
+       MAG_LANGUAGE                     => array( 0,    '#LANGUAGE:' ),
+       MAG_CONTENTLANGUAGE              => array( 1,    'CONTENTLANGUAGE', 'CONTENTLANG' ),
 );
 
 if (!$wgCachedMessageArrays) {
@@ -317,24 +320,6 @@ class fakeConverter {
 class Language {
        var $mConverter;
        function Language() {
-
-               # Copies any missing values in the specified arrays from En to the current language
-               $fillin = array( 'wgSysopSpecialPages', 'wgValidSpecialPages', 'wgDeveloperSpecialPages' );
-               $name = get_class( $this );
-
-               if( strpos( $name, 'language' ) == 0){
-                       $lang = ucfirst( substr( $name, 8 ) );
-                       foreach( $fillin as $arrname ){
-                               $langver = "{$arrname}{$lang}";
-                               $enver = "{$arrname}En";
-                               if( ! isset( $GLOBALS[$langver] ) || ! isset( $GLOBALS[$enver] ))
-                                       continue;
-                               foreach($GLOBALS[$enver] as $spage => $text){
-                                       if( ! isset( $GLOBALS[$langver][$spage] ) )
-                                               $GLOBALS[$langver][$spage] = $text;
-                               }
-                       }
-               }
                $this->mConverter = new fakeConverter($this);
        }
 
@@ -747,41 +732,73 @@ class Language {
                return iconv( $in, $out, $string );
        }
 
-       function ucfirst( $string ) {
-               # For most languages, this is a wrapper for ucfirst()
-               return ucfirst( $string );
-       }
-
-       function uc( $str ) {
-               return strtoupper( $str );
-       }
-
-       function lcfirst( $s ) {
-               return strtolower( $s{0} ). substr( $s, 1 );
+       function ucfirst( $str ) {
+               return $this->uc( $str, true );
        }
 
-       function lc( $str ) {
-               return strtolower( $str );
+       function uc( $str, $first = false ) {
+               if ( function_exists( 'mb_strtoupper' ) )
+                       if ( $first )
+                               if ( $this->isMultibyte( $str ) )
+                                       return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
+                               else
+                                       return ucfirst( $str );
+                       else
+                               return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
+               else
+                       if ( $this->isMultibyte( $str ) ) {
+                               global $wikiUpperChars;
+                               $x = $first ? '^' : '';
+                               return preg_replace(
+                                       "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                                       "strtr( \"\$1\" , \$wikiUpperChars )",
+                                       $str
+                               );
+                       } else
+                               return $first ? ucfirst( $str ) : strtoupper( $str );
+       }
+
+       function lcfirst( $str ) {
+               return $this->lc( $str, true );
+       }
+
+       function lc( $str, $first = false ) {
+               if ( function_exists( 'mb_strtolower' ) )
+                       if ( $first )
+                               if ( $this->isMultibyte( $str ) )
+                                       return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
+                               else
+                                       return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
+                       else
+                               return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
+               else
+                       if ( $this->isMultibyte( $str ) ) {
+                               global $wikiLowerChars;
+                               $x = $first ? '^' : '';
+                               return preg_replace(
+                                       "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                                       "strtr( \"\$1\" , \$wikiLowerChars )",
+                                       $str
+                               );
+                       } else
+                               return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
        }
 
        function checkTitleEncoding( $s ) {
                global $wgInputEncoding;
 
-               # Check for UTF-8 URLs; Internet Explorer produces these if you
-               # type non-ASCII chars in the URL bar or follow unescaped links.
+               if( is_array( $s ) ) {
+                       wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
+               }
+               # Check for non-UTF-8 URLs
                $ishigh = preg_match( '/[\x80-\xff]/', $s);
-               $isutf = ($ishigh ? preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
-                        '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ) : true );
-
-               if( ($wgInputEncoding != 'utf-8') and $ishigh and $isutf )
-                       return @iconv( 'UTF-8', $wgInputEncoding, $s );
+               if(!$ishigh) return $s;
 
-               if( ($wgInputEncoding == 'utf-8') and $ishigh and !$isutf )
-                       return utf8_encode( $s );
+               $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
+                '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
+               if( $isutf8 ) return $s;
 
-               # Other languages can safely leave this function, or replace
-               # it with one to detect and convert another legacy encoding.
-               return $s;
+               return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
        }
 
        /**
@@ -789,11 +806,33 @@ class Language {
         * or characters which need to be converted for MySQL's
         * indexing to grok it correctly. Make such changes here.
         *
-        * @param string $in
+        * @param string $string
         * @return string
         */
-       function stripForSearch( $in ) {
-               return strtolower( $in );
+       function stripForSearch( $string ) {
+               # MySQL fulltext index doesn't grok utf-8, so we
+               # need to fold cases and convert to hex
+
+               # In Language:: it just returns lowercase, maybe
+               # all strtolower on stripped output or argument
+               # should be removed and all stripForSearch
+               # methods adjusted to that.
+
+               wfProfileIn( "Language::stripForSearch" );
+               if( function_exists( 'mb_strtolower' ) ) {
+                       $out = preg_replace(
+                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                               "'U8' . bin2hex( \"$1\" )",
+                               mb_strtolower( $string ) );
+               } else {
+                       global $wikiLowerChars;
+                       $out = preg_replace(
+                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                               "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
+                               $string );
+               }
+               wfProfileOut( "Language::stripForSearch" );
+               return $out;
        }
 
        function convertForSearchResult( $termsArray ) {
@@ -811,7 +850,10 @@ class Language {
         * @return string
         */
        function firstChar( $s ) {
-               return $s[0];
+               preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
+               '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
+
+               return isset( $matches[1] ) ? $matches[1] : "";
        }
 
        function initEncoding() {
@@ -996,7 +1038,7 @@ class Language {
        #
        # $length does not include the optional ellipsis.
        # If $length is negative, snip from the beginning
-       function truncate( $string, $length, $ellipsis = '' ) {
+       function truncate( $string, $length, $ellipsis = "" ) {
                if( $length == 0 ) {
                        return $ellipsis;
                }
@@ -1005,9 +1047,24 @@ class Language {
                }
                if( $length > 0 ) {
                        $string = substr( $string, 0, $length );
+                       $char = ord( $string[strlen( $string ) - 1] );
+                       if ($char >= 0xc0) {
+                               # We got the first byte only of a multibyte char; remove it.
+                               $string = substr( $string, 0, -1 );
+                       } elseif( $char >= 0x80 &&
+                                 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
+                                             '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
+                           # We chopped in the middle of a character; remove it
+                               $string = $m[1];
+                       }
                        return $string . $ellipsis;
                } else {
                        $string = substr( $string, $length );
+                       $char = ord( $string[0] );
+                       if( $char >= 0x80 && $char < 0xc0 ) {
+                               # We chopped in the middle of a character; remove the whole thing
+                               $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
+                       }
                        return $ellipsis . $string;
                }
        }
@@ -1022,8 +1079,8 @@ class Language {
         */
        function convertGrammar( $word, $case ) {
                global $wgGrammarForms;
-               if ( isset($wgGrammarForms[$case][$word]) ) {
-                       return $wgGrammarForms[$case][$word];
+               if ( isset($wgGrammarForms['en'][$case][$word]) ) {
+                       return $wgGrammarForms['en'][$case][$word];
                }
                return $word;
        }
@@ -1205,12 +1262,33 @@ class Language {
                return str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
        }
 
+       function isMultibyte( $str ) {
+               return (bool)preg_match( '/^[\x80-\xff]/', $str );
+       }
 
+       function fallback8bitEncoding() {
+               # Windows codepage 1252 is a superset of iso 8859-1
+               # override this to use difference source encoding to
+               # translate incoming 8-bit URLs.
+               return "windows-1252";
+       }
 }
 
-# FIXME: Merge all UTF-8 support code into Language base class.
-# We no longer support Latin-1 charset.
-require_once( 'LanguageUtf8.php' );
+if( function_exists( 'mb_strtoupper' ) ) {
+       mb_internal_encoding('UTF-8');
+} else {
+       # Hack our own case conversion routines
+
+       # Loading serialized arrays is faster than parsing code :P
+       $wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
+       $wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
+
+       if(empty( $wikiUpperChars) || empty($wikiLowerChars )) {
+               require_once( "includes/Utf8Case.php" );
+               $wgMemc->set( $key1, $wikiUpperChars );
+               $wgMemc->set( $key2, $wikiLowerChars );
+       }
+}
 
 # This should fail gracefully if there's not a localization available
 wfSuppressWarnings();