Fleshed out file
[lhc/web/wiklou.git] / languages / LanguageConverter.php
index 23d532c..223336a 100644 (file)
@@ -1,11 +1,24 @@
 <?php
-
 /**
  * Contains the LanguageConverter class and ConverterRule class
- * @ingroup Language
  *
- * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
  * @file
+ * @ingroup Language
  */
 
 /**
  * @maintainers fdcn <fdcn64@gmail.com>, shinjiman <shinjiman@gmail.com>, PhiLiP <philip.npc@gmail.com>
  */
 class LanguageConverter {
-       var $mPreferredVariant = ''; // The User's preferred variant
        var $mMainLanguageCode;
        var $mVariants, $mVariantFallbacks, $mVariantNames;
        var $mTablesLoaded = false;
        var $mTables;
-       var $mNamespaceTables;
        // 'bidirectional' 'unidirectional' 'disable' for each variant
        var $mManualLevel;
+
+       /**
+        * @var String: memcached key name
+        */
        var $mCacheKey;
+
        var $mLangObj;
-       var $mMarkup;
        var $mFlags;
        var $mDescCodeSep = ':', $mDescVarSep = ';';
        var $mUcfirst = false;
-       var $mHeaderVariant;
        var $mConvRuleTitle = false;
+       var $mURLVariant;
+       var $mUserVariant;
+       var $mHeaderVariant;
+       var $mMaxDepth = 10;
+       var $mVarSeparatorPattern;
 
        const CACHE_VERSION_KEY = 'VERSION 6';
 
        /**
         * Constructor
         *
-        * @param $langobj The Language Object
-        * @param string $maincode the main language code of this language
-        * @param array $variants the supported variants of this language
-        * @param array $variantfallback the fallback language of each variant
-        * @param array $markup array defining the markup used for manual conversion
-        * @param array $flags array defining the custom strings that maps to the
-        *              flags
-        * @param array $manualLevel limit for supported variants
-        * @public
+        * @param $langobj Language: the Language Object
+        * @param $maincode String: the main language code of this language
+        * @param $variants Array: the supported variants of this language
+        * @param $variantfallbacks Array: the fallback language of each variant
+        * @param $flags Array: defining the custom strings that maps to the flags
+        * @param $manualLevel Array: limit for supported variants
         */
-       function __construct( $langobj, $maincode,
-                                                               $variants = array(),
-                                                               $variantfallbacks = array(),
-                                                               $markup = array(),
-                                                               $flags = array(),
+       public function __construct( $langobj, $maincode, $variants = array(),
+                                                               $variantfallbacks = array(), $flags = array(),
                                                                $manualLevel = array() ) {
+               global $wgDisabledVariants;
                $this->mLangObj = $langobj;
                $this->mMainLanguageCode = $maincode;
-
-               global $wgDisabledVariants;
-               $this->mVariants = array();
-               foreach ( $variants as $variant ) {
-                       if ( !in_array( $variant, $wgDisabledVariants ) ) {
-                               $this->mVariants[] = $variant;
-                       }
-               }
+               $this->mVariants = array_diff( $variants, $wgDisabledVariants );
                $this->mVariantFallbacks = $variantfallbacks;
-               global $wgLanguageNames;
-               $this->mVariantNames = $wgLanguageNames;
+               $this->mVariantNames = Language::getLanguageNames();
                $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
-               $m = array(
-                       'begin' => '-{',
-                       'flagsep' => '|',
-                       'unidsep' => '=>', // for unidirectional conversion
-                       'codesep' => ':',
-                       'varsep' => ';',
-                       'end' => '}-'
-               );
-               $this->mMarkup = array_merge( $m, $markup );
-               $f = array(
+               $defaultflags = array(
                        // 'S' show converted text
                        // '+' add rules for alltext
                        // 'E' the gave flags is error
@@ -88,25 +85,27 @@ class LanguageConverter {
                        'D' => 'D',       // convert description (subclass implement)
                        '-' => '-',       // remove convert (not implement)
                        'H' => 'H',       // add rule for convert code
-                                     // (but no display in placed code )
+                                                 // (but no display in placed code)
                        'N' => 'N'        // current variant name
                );
-               $this->mFlags = array_merge( $f, $flags );
+               $this->mFlags = array_merge( $defaultflags, $flags );
                foreach ( $this->mVariants as $v ) {
                        if ( array_key_exists( $v, $manualLevel ) ) {
                                $this->mManualLevel[$v] = $manualLevel[$v];
                        } else {
                                $this->mManualLevel[$v] = 'bidirectional';
                        }
-                       $this->mNamespaceTables[$v] = array();
                        $this->mFlags[$v] = $v;
                }
        }
 
        /**
-        * @public
+        * Get all valid variants.
+        * Call this instead of using $this->mVariants directly.
+        *
+        * @return Array: contains all valid variants
         */
-       function getVariants() {
+       public function getVariants() {
                return $this->mVariants;
        }
 
@@ -117,82 +116,143 @@ class LanguageConverter {
         * when zh-sg is preferred but not defined, we will pick zh-hans
         * in this case. Right now this is only used by zh.
         *
-        * @param string $v The language code of the variant
-        * @return string array The code of the fallback language or false if there
-        *                      is no fallback
-        * @public
+        * @param $variant String: the language code of the variant
+        * @return String: The code of the fallback language or the
+        *                               main code if there is no fallback
         */
-       function getVariantFallbacks( $v ) {
-               if ( isset( $this->mVariantFallbacks[$v] ) ) {
-                       return $this->mVariantFallbacks[$v];
+       public function getVariantFallbacks( $variant ) {
+               if ( isset( $this->mVariantFallbacks[$variant] ) ) {
+                       return $this->mVariantFallbacks[$variant];
                }
                return $this->mMainLanguageCode;
        }
 
        /**
-        * Get preferred language variants.
-        * @param boolean $fromUser Get it from $wgUser's preferences
-        * @param boolean $fromHeader Get it from Accept-Language
-        * @return string the preferred language code
-        * @public
+        * Get the title produced by the conversion rule.
+        * @return String: The converted title text
         */
-       function getPreferredVariant( $fromUser = true, $fromHeader = false ) {
-               global $wgUser, $wgRequest, $wgVariantArticlePath,
-                       $wgDefaultLanguageVariant, $wgOut;
+       public function getConvRuleTitle() {
+               return $this->mConvRuleTitle;
+       }
 
-               // see if the preference is set in the request
-               $req = $wgRequest->getText( 'variant' );
+       /**
+        * Get preferred language variant.
+        * @return String: the preferred language code
+        */
+       public function getPreferredVariant() {
+               global $wgDefaultLanguageVariant, $wgUser;
 
-               if ( !$req ) {
-                       $req = $wgRequest->getVal( 'uselang' );
-               }
+               $req = $this->getURLVariant();
 
-               if ( $fromUser && !$req ) {
+               if ( $wgUser->isLoggedIn() && !$req ) {
                        $req = $this->getUserVariant();
+               } elseif ( !$req ) {
+                       $req = $this->getHeaderVariant();
                }
 
-               if ( $fromHeader && !$req ) {
-                       $req = $this->getHeaderVariant();
+               if ( $wgDefaultLanguageVariant && !$req ) {
+                       $req = $this->validateVariant( $wgDefaultLanguageVariant );
                }
 
+               // This function, unlike the other get*Variant functions, is
+               // not memoized (i.e. there return value is not cached) since
+               // new information might appear during processing after this
+               // is first called.
+               if ( $req ) {
+                       return $req;
+               }
+               return $this->mMainLanguageCode;
+       }
+
+       /**
+        * Get default variant.
+        * This function would not be affected by user's settings or headers
+        * @return String: the default variant code
+        */
+       public function getDefaultVariant() {
+               global $wgDefaultLanguageVariant;
+
+               $req = $this->getURLVariant();
+
                if ( $wgDefaultLanguageVariant && !$req ) {
-                       $req = $wgDefaultLanguageVariant;
+                       $req = $this->validateVariant( $wgDefaultLanguageVariant );
                }
 
-               if ( in_array( $req, $this->mVariants ) ) {
+               if ( $req ) {
                        return $req;
                }
                return $this->mMainLanguageCode;
        }
 
        /**
-        * Determine the user has a variant set.
+        * Validate the variant
+        * @param $variant String: the variant to validate
+        * @return Mixed: returns the variant if it is valid, null otherwise
+        */
+       protected function validateVariant( $variant = null ) {
+               if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
+                       return $variant;
+               }
+               return null;
+       }
+
+       /**
+        * Get the variant specified in the URL
         *
-        * @returns mixed variant if one found, false otherwise.
+        * @return Mixed: variant if one found, false otherwise.
         */
-       function getUserVariant() {
+       public function getURLVariant() {
+               global $wgRequest;
+
+               if ( $this->mURLVariant ) {
+                       return $this->mURLVariant;
+               }
+
+               // see if the preference is set in the request
+               $ret = $wgRequest->getText( 'variant' );
+
+               if ( !$ret ) {
+                       $ret = $wgRequest->getVal( 'uselang' );
+               }
+
+               return $this->mURLVariant = $this->validateVariant( $ret );
+       }
+
+       /**
+        * Determine if the user has a variant set.
+        *
+        * @return Mixed: variant if one found, false otherwise.
+        */
+       protected function getUserVariant() {
                global $wgUser;
 
-               // get language variant preference from logged in users
+               // memoizing this function wreaks havoc on parserTest.php
+               /*
+               if ( $this->mUserVariant ) {
+                       return $this->mUserVariant;
+               }
+               */
+
+               // Get language variant preference from logged in users
                // Don't call this on stub objects because that causes infinite
                // recursion during initialisation
                if ( $wgUser->isLoggedIn() )  {
-                       return $wgUser->getOption( 'variant' );
-               }
-               else {
+                       $ret = $wgUser->getOption( 'variant' );
+               } else {
                        // figure out user lang without constructing wgLang to avoid
                        // infinite recursion
-                       return $wgUser->getOption( 'language' );
+                       $ret = $wgUser->getOption( 'language' );
                }
-       }
 
+               return $this->mUserVariant = $this->validateVariant( $ret );
+       }
 
        /**
         * Determine the language variant from the Accept-Language header.
         *
-        * @returns mixed variant if one found, false otherwise.
+        * @return Mixed: variant if one found, false otherwise.
         */
-       function getHeaderVariant() {
+       protected function getHeaderVariant() {
                global $wgRequest;
 
                if ( $this->mHeaderVariant ) {
@@ -200,110 +260,76 @@ class LanguageConverter {
                }
 
                // see if some supported language variant is set in the
-               // http header, but we don't set the mPreferredVariant
-               // variable in case this is called before the user's
-               // preference is loaded
-
-               $acceptLanguage = $wgRequest->getHeader( 'Accept-Language' );
-               if ( !$acceptLanguage ) {
-                       return false;
+               // HTTP header.
+               $languages = array_keys( $wgRequest->getAcceptLang() );
+               if ( empty( $languages ) ) {
+                       return null;
                }
 
-               // explode by comma
-               $result = explode( ',', strtolower( $acceptLanguage ) );
-
-               $languages = array();
-
-               foreach ( $result as $elem ) {
-                       // if $elem likes 'zh-cn;q=0.9'
-                       if ( ( $posi = strpos( $elem, ';' ) ) !== false ) {
-                               // get the real language code likes 'zh-cn'
-                               $languages[] = substr( $elem, 0, $posi );
-                       } else {
-                               $languages[] = $elem;
+               $fallbackLanguages = array();
+               foreach ( $languages as $language ) {
+                       $this->mHeaderVariant = $this->validateVariant( $language );
+                       if ( $this->mHeaderVariant ) {
+                               break;
                        }
-               }
 
-               $fallback_languages = array();
-               foreach ( $languages as $language ) {
-                       // strip whitespace
-                       $language = trim( $language );
-                       if ( in_array( $language, $this->mVariants ) ) {
-                               $this->mHeaderVariant = $language;
-                               return $language;
-                       } else {
-                               // To see if there are fallbacks of current language.
-                               // We record these fallback variants, and process
-                               // them later.
-                               $fallbacks = $this->getVariantFallbacks( $language );
-                               if ( is_string( $fallbacks ) ) {
-                                       $fallback_languages[] = $fallbacks;
-                               } elseif ( is_array( $fallbacks ) ) {
-                                       $fallback_languages =
-                                               array_merge( $fallback_languages,
-                                                                        $fallbacks );
-                               }
+                       // To see if there are fallbacks of current language.
+                       // We record these fallback variants, and process
+                       // them later.
+                       $fallbacks = $this->getVariantFallbacks( $language );
+                       if ( is_string( $fallbacks ) ) {
+                               $fallbackLanguages[] = $fallbacks;
+                       } elseif ( is_array( $fallbacks ) ) {
+                               $fallbackLanguages =
+                                       array_merge( $fallbackLanguages, $fallbacks );
                        }
                }
 
-               // process fallback languages now
-               $fallback_languages = array_unique( $fallback_languages );
-               foreach ( $fallback_languages as $language ) {
-                       if ( in_array( $language, $this->mVariants ) ) {
-                               $this->mHeaderVariant = $language;
-                               return $language;
+               if ( !$this->mHeaderVariant ) {
+                       // process fallback languages now
+                       $fallback_languages = array_unique( $fallbackLanguages );
+                       foreach ( $fallback_languages as $language ) {
+                               $this->mHeaderVariant = $this->validateVariant( $language );
+                               if ( $this->mHeaderVariant ) {
+                                       break;
+                               }
                        }
                }
-       }
 
-       /**
-        * Caption convert, base on preg_replace_callback.
-        *
-        * To convert text in "title" or "alt", like '<img alt="text" ... '
-        * or '<span title="text" ... '
-        *
-        * @return string like ' alt="yyyy"' or ' title="yyyy"'
-        * @private
-        */
-       function captionConvert( $matches ) {
-               $toVariant = $this->getPreferredVariant();
-               $title = $matches[1];
-               $text  = $matches[2];
-               // we convert captions except URL
-               if ( !strpos( $text, '://' ) ) {
-                       $text = $this->translate( $text, $toVariant );
-               }
-               return " $title=\"$text\"";
+               return $this->mHeaderVariant;
        }
 
        /**
         * Dictionary-based conversion.
+        * This function would not parse the conversion rules.
+        * If you want to parse rules, try to use convert() or
+        * convertTo().
         *
-        * @param string $text the text to be converted
-        * @param string $toVariant the target language code
-        * @return string the converted text
-        * @private
+        * @param $text String: the text to be converted
+        * @param $toVariant String: the target language code
+        * @return String: the converted text
         */
-       function autoConvert( $text, $toVariant = false ) {
-               $fname = 'LanguageConverter::autoConvert';
-
-               wfProfileIn( $fname );
+       public function autoConvert( $text, $toVariant = false ) {
+               wfProfileIn( __METHOD__ );
 
-               if ( !$this->mTablesLoaded ) {
-                       $this->loadTables();
-               }
+               $this->loadTables();
 
                if ( !$toVariant ) {
                        $toVariant = $this->getPreferredVariant();
+                       if ( !$toVariant ) {
+                               wfProfileOut( __METHOD__ );
+                               return $text;
+                       }
                }
-               if ( !in_array( $toVariant, $this->mVariants ) ) {
+
+               if( $this->guessVariant( $text, $toVariant ) ) {
                        return $text;
                }
 
                /* we convert everything except:
-                  1. html markups (anything between < and >)
-                  2. html entities
-                  3. place holders created by the parser
+                  1. HTML markups (anything between < and >)
+                  2. HTML entities
+                  3. placeholders created by the parser
                */
                global $wgParser;
                if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
@@ -312,7 +338,7 @@ class LanguageConverter {
                        $marker = '';
                }
 
-               // this one is needed when the text is inside an html markup
+               // this one is needed when the text is inside an HTML markup
                $htmlfix = '|<[^>]+$|^[^<>]*>';
 
                // disable convert to variants between <code></code> tags
@@ -324,61 +350,94 @@ class LanguageConverter {
 
                $reg = '/' . $codefix . $scriptfix . $prefix .
                        '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
+               $startPos = 0;
+               $sourceBlob = '';
+               $literalBlob = '';
+
+               // Guard against delimiter nulls in the input
+               $text = str_replace( "\000", '', $text );
+
+               $markupMatches = null;
+               $elementMatches = null;
+               while ( $startPos < strlen( $text ) ) {
+                       if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
+                               $elementPos = $markupMatches[0][1];
+                               $element = $markupMatches[0][0];
+                       } else {
+                               $elementPos = strlen( $text );
+                               $element = '';
+                       }
 
-               $matches = preg_split( $reg, $text, - 1, PREG_SPLIT_OFFSET_CAPTURE );
-
-               $m = array_shift( $matches );
-
-               $ret = $this->translate( $m[0], $toVariant );
-               $mstart = $m[1] + strlen( $m[0] );
-
-               // enable convertsion of '<img alt="xxxx" ... '
-               // or '<span title="xxxx" ... '
-               $captionpattern  = '/\s(title|alt)\s*=\s*"([\s\S]*?)"/';
-
-               $trtext = '';
-               $trtextmark = "\0";
-               $notrtext = array();
-               foreach ( $matches as $m ) {
-                       $mark = substr( $text, $mstart, $m[1] - $mstart );
-                       $mark = preg_replace_callback( $captionpattern,
-                                                                                  array( &$this, 'captionConvert' ),
-                                                                                  $mark );
-                       // Let's convert the trtext only once,
-                       // it would give us more performance improvement
-                       $notrtext[] = $mark;
-                       $trtext .= $m[0] . $trtextmark;
-                       $mstart = $m[1] + strlen( $m[0] );
-               }
-               $notrtext[] = '';
-               $trtext = $this->translate( $trtext, $toVariant );
-               $trtext = StringUtils::explode( $trtextmark, $trtext );
-               foreach ( $trtext as $t ) {
-                       $ret .= array_shift( $notrtext );
-                       $ret .= $t;
-               }
-               wfProfileOut( $fname );
-               return $ret;
+                       // Queue the part before the markup for translation in a batch
+                       $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
+
+                       // Advance to the next position
+                       $startPos = $elementPos + strlen( $element );           
+
+                       // Translate any alt or title attributes inside the matched element
+                       if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, 
+                               $elementMatches ) ) 
+                       {
+                               $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
+                               $changed = false;
+                               foreach ( array( 'title', 'alt' ) as $attrName ) {
+                                       if ( !isset( $attrs[$attrName] ) ) {
+                                               continue;
+                                       }
+                                       $attr = $attrs[$attrName];
+                                       // Don't convert URLs
+                                       if ( !strpos( $attr, '://' ) ) {
+                                               $attr = $this->translate( $attr, $toVariant );
+                                       }
+                                       
+                                       // Remove HTML tags to avoid disrupting the layout
+                                       $attr = preg_replace( '/<[^>]+>/', '', $attr );
+                                       if ( $attr !== $attrs[$attrName] ) {
+                                               $attrs[$attrName] = $attr;
+                                               $changed = true;
+                                       }
+                               }
+                               if ( $changed ) {
+                                       $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . 
+                                               $elementMatches[3];
+                               }
+                       }
+                       $literalBlob .= $element . "\000";
+               }
+
+               // Do the main translation batch
+               $translatedBlob = $this->translate( $sourceBlob, $toVariant );
+
+               // Put the output back together
+               $translatedIter = StringUtils::explode( "\000", $translatedBlob );
+               $literalIter = StringUtils::explode( "\000", $literalBlob );
+               $output = '';
+               while ( $translatedIter->valid() && $literalIter->valid() ) {
+                       $output .= $translatedIter->current();
+                       $output .= $literalIter->current();
+                       $translatedIter->next();
+                       $literalIter->next();
+               }
+
+               wfProfileOut( __METHOD__ );
+               return $output;
        }
 
        /**
         * Translate a string to a variant.
-        * Doesn't process markup or do any of that other stuff, for that use
-        * convert().
+        * Doesn't parse rules or do any of that other stuff, for that use
+        * convert() or convertTo().
         *
-        * @param string $text Text to convert
-        * @param string $variant Variant language code
-        * @return string Translated text
-        * @private
+        * @param $text String: text to convert
+        * @param $variant String: variant language code
+        * @return String: translated text
         */
-       function translate( $text, $variant ) {
+       public function translate( $text, $variant ) {
                wfProfileIn( __METHOD__ );
                // If $text is empty or only includes spaces, do nothing
                // Otherwise translate it
                if ( trim( $text ) ) {
-                       if ( !$this->mTablesLoaded ) {
-                               $this->loadTables();
-                       }
+                       $this->loadTables();
                        $text = $this->mTables[$variant]->replace( $text );
                }
                wfProfileOut( __METHOD__ );
@@ -386,78 +445,60 @@ class LanguageConverter {
        }
 
        /**
-        * Convert text to all supported variants.
+        * Call translate() to convert text to all valid variants.
         *
-        * @param string $text the text to be converted
-        * @return array of string
-        * @public
+        * @param $text String: the text to be converted
+        * @return Array: variant => converted text
         */
-       function autoConvertToAllVariants( $text ) {
-               $fname = 'LanguageConverter::autoConvertToAllVariants';
-               wfProfileIn( $fname );
-               if ( !$this->mTablesLoaded ) {
-                       $this->loadTables();
-               }
+       public function autoConvertToAllVariants( $text ) {
+               wfProfileIn( __METHOD__ );
+               $this->loadTables();
 
                $ret = array();
                foreach ( $this->mVariants as $variant ) {
                        $ret[$variant] = $this->translate( $text, $variant );
                }
 
-               wfProfileOut( $fname );
+               wfProfileOut( __METHOD__ );
                return $ret;
        }
 
        /**
-        * Convert link text to all supported variants.
+        * Convert link text to all valid variants.
+        * In the first, this function only convert text outside the
+        * "-{" "}-" markups. Since the "{" and "}" are not allowed in
+        * titles, the text will get all converted always.
+        * So I removed this feature and deprecated the function.
         *
-        * @param string $text the text to be converted
-        * @return array of string
-        * @public
+        * @param $text String: the text to be converted
+        * @return Array: variant => converted text
+        * @deprecated since 1.17 Use autoConvertToAllVariants() instead
         */
-       function convertLinkToAllVariants( $text ) {
-               if ( !$this->mTablesLoaded ) {
-                       $this->loadTables();
-               }
-
-               $ret = array();
-               $tarray = explode( $this->mMarkup['begin'], $text );
-               $tfirst = array_shift( $tarray );
-
-               foreach ( $this->mVariants as $variant ) {
-                       $ret[$variant] = $this->translate( $tfirst, $variant );
-               }
-
-               foreach ( $tarray as $txt ) {
-                       $marked = explode( $this->mMarkup['end'], $txt, 2 );
-
-                       foreach ( $this->mVariants as $variant ) {
-                               $ret[$variant] .= $this->mMarkup['begin'] . $marked[0] .
-                                       $this->mMarkup['end'];
-                               if ( array_key_exists( 1, $marked ) ) {
-                                       $ret[$variant] .= $this->translate( $marked[1], $variant );
-                               }
-                       }
-
-               }
-
-               return $ret;
+       public function convertLinkToAllVariants( $text ) {
+               return $this->autoConvertToAllVariants( $text );
        }
 
        /**
-        * Prepare manual conversion table.
-        * @private
+        * Apply manual conversion rules.
+        *
+        * @param $convRule Object: Object of ConverterRule
         */
-       function applyManualConv( $convRule ) {
-               // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom
-               // conversion in title
-               $this->mConvRuleTitle = $convRule->getTitle();
-
-               // apply manual conversion table to global table
+       protected function applyManualConv( $convRule ) {
+               // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
+               // title conversion.
+               // Bug 24072: $mConvRuleTitle was overwritten by other manual
+               // rule(s) not for title, this breaks the title conversion.
+               $newConvRuleTitle = $convRule->getTitle();
+               if ( $newConvRuleTitle ) {
+                       // So I add an empty check for getTitle()
+                       $this->mConvRuleTitle = $newConvRuleTitle;
+               }
+
+               // merge/remove manual conversion rules to/from global table
                $convTable = $convRule->getConvTable();
                $action = $convRule->getRulesAction();
                foreach ( $convTable as $variant => $pair ) {
-                       if ( !in_array( $variant, $this->mVariants ) ) {
+                       if ( !$this->validateVariant( $variant ) ) {
                                continue;
                        }
 
@@ -477,47 +518,33 @@ class LanguageConverter {
        }
 
        /**
-        * Convert namespace.
-        * @param string $title the title included namespace
-        * @return array of string
-        * @private
-        */
-       function convertNamespace( $title, $variant ) {
-               $splittitle = explode( ':', $title );
-               if ( count( $splittitle ) < 2 ) {
-                       return $title;
-               }
-               if ( isset( $this->mNamespaceTables[$variant][$splittitle[0]] ) ) {
-                       $splittitle[0] = $this->mNamespaceTables[$variant][$splittitle[0]];
-               }
-               $ret = implode( ':', $splittitle );
-               return $ret;
-       }
-
-       /**
-        * Convert a text fragment.
+        * Auto convert a Title object to a readable string in the
+        * preferred variant.
         *
-        * @param string $text text to be converted
-        * @param string $plang preferred variant
-        * @return string converted text
-        * @private
+        * @param $title Object: a object of Title
+        * @return String: converted title text
         */
-       function convertFragment( $text, $plang ) {
-               $marked = explode( $this->mMarkup['begin'], $text, 2 );
-               $converted = '';
-
-               $converted .= $this->autoConvert( $marked[0], $plang );
-
-               if ( array_key_exists( 1, $marked ) ) {
-                       $crule = new ConverterRule( $marked[1], $this );
-                       $crule->parse( $plang );
-                       $converted .= $crule->getDisplay();
-                       $this->applyManualConv( $crule );
+       public function convertTitle( $title ) {
+               $variant = $this->getPreferredVariant();
+               $index = $title->getNamespace();
+               if ( $index === NS_MAIN ) {
+                       $text = '';
                } else {
-                       $converted .= $this->mMarkup['end'];
+                       // first let's check if a message has given us a converted name
+                       $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
+                       if ( $nsConvMsg->exists() ) {
+                               $text = $nsConvMsg->plain();
+                       } else {
+                               // the message does not exist, try retrieve it from the current
+                               // variant's namespace names.
+                               $langObj = $this->mLangObj->factory( $variant );
+                               $text = $langObj->getFormattedNsText( $index );
+                       }
+                       $text .= ':';
                }
-
-               return $converted;
+               $text .= $title->getText();
+               $text = $this->translate( $text, $variant );
+               return $text;
        }
 
        /**
@@ -531,42 +558,152 @@ class LanguageConverter {
         * -{flags|code1:text1;code2:text2;...}-  or
         * -{text}- in which case no conversion should take place for text
         *
-        * @param string $text text to be converted
-        * @return string converted text
-        * @public
+        * @param $text String: text to be converted
+        * @return String: converted text
         */
-       function convert( $text ) {
+       public function convert( $text ) {
+               $variant = $this->getPreferredVariant();
+               return $this->convertTo( $text, $variant );
+       }
+
+       /**
+        * Same as convert() except a extra parameter to custom variant.
+        *
+        * @param $text String: text to be converted
+        * @param $variant String: the target variant code
+        * @return String: converted text
+        */
+       public function convertTo( $text, $variant ) {
                global $wgDisableLangConversion;
-               if ( $wgDisableLangConversion ) return $text;
+               if ( $wgDisableLangConversion || $this->guessVariant( $text, $variant ) ) {
+                       return $text;
+               }
+               return $this->recursiveConvertTopLevel( $text, $variant );
+       }
 
-               $plang = $this->getPreferredVariant();
+       /**
+        * Recursively convert text on the outside. Allow to use nested
+        * markups to custom rules.
+        *
+        * @param $text String: text to be converted
+        * @param $variant String: the target variant code
+        * @param $depth Integer: depth of recursion
+        * @return String: converted text
+        */
+       protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
+               $startPos = 0;
+               $out = '';
+               $length = strlen( $text );
+               while ( $startPos < $length ) {
+                       $pos = strpos( $text, '-{', $startPos );
+
+                       if ( $pos === false ) {
+                               // No more markup, append final segment
+                               $out .= $this->autoConvert( substr( $text, $startPos ), $variant );
+                               return $out;
+                       }
+
+                       // Markup found
+                       // Append initial segment
+                       $out .= $this->autoConvert( substr( $text, $startPos, $pos - $startPos ), $variant );
 
-               $tarray = StringUtils::explode( $this->mMarkup['end'], $text );
-               $converted = '';
+                       // Advance position
+                       $startPos = $pos;
 
-               foreach ( $tarray as $txt ) {
-                       $converted .= $this->convertFragment( $txt, $plang );
+                       // Do recursive conversion
+                       $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
                }
 
-               // Remove the last delimiter (wasn't real)
-           $converted = substr( $converted, 0, - strlen( $this->mMarkup['end'] ) );
-               return $converted;
+               return $out;
        }
 
        /**
-        * If a language supports multiple variants, it is
-        * possible that non-existing link in one variant
-        * actually exists in another variant. This function
-        * tries to find it. See e.g. LanguageZh.php
+        * Recursively convert text on the inside.
         *
-        * @param string $link the name of the link
-        * @param mixed $nt the title object of the link
-        * @param boolean $ignoreOtherCond: to disable other conditions when
-        *      we need to transclude a template or update a category's link
-        * @return null the input parameters may be modified upon return
-        * @public
+        * @param $text String: text to be converted
+        * @param $variant String: the target variant code
+        * @param $depth Integer: depth of recursion
+        * @return String: converted text
         */
-       function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
+       protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
+               // Quick sanity check (no function calls)
+               if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
+                       throw new MWException( __METHOD__ . ': invalid input string' );
+               }
+
+               $startPos += 2;
+               $inner = '';
+               $warningDone = false;
+               $length = strlen( $text );
+
+               while ( $startPos < $length ) {
+                       $m = false;
+                       preg_match( '/-\{|\}-/', $text, $m,  PREG_OFFSET_CAPTURE, $startPos );
+                       if ( !$m ) {
+                               // Unclosed rule
+                               break;
+                       }
+
+                       $token = $m[0][0];
+                       $pos = $m[0][1];
+
+                       // Markup found
+                       // Append initial segment
+                       $inner .= substr( $text, $startPos, $pos - $startPos );
+
+                       // Advance position
+                       $startPos = $pos;
+
+                       switch ( $token ) {
+                               case '-{':
+                                       // Check max depth
+                                       if ( $depth >= $this->mMaxDepth ) {
+                                               $inner .= '-{';
+                                               if ( !$warningDone ) {
+                                                       $inner .= '<span class="error">' .
+                                                               wfMsgForContent( 'language-converter-depth-warning',
+                                                                       $this->mMaxDepth ) .
+                                                               '</span>';
+                                                       $warningDone = true;
+                                               }
+                                               $startPos += 2;
+                                               continue;
+                                       }
+                                       // Recursively parse another rule
+                                       $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
+                                       break;
+                               case '}-':
+                                       // Apply the rule
+                                       $startPos += 2;
+                                       $rule = new ConverterRule( $inner, $this );
+                                       $rule->parse( $variant );
+                                       $this->applyManualConv( $rule );
+                                       return $rule->getDisplay();
+                               default:
+                                       throw new MWException( __METHOD__ . ': invalid regex match' );
+                       }
+               }
+
+               // Unclosed rule
+               if ( $startPos < $length ) {
+                       $inner .= substr( $text, $startPos );
+               }
+               $startPos = $length;
+               return '-{' . $this->autoConvert( $inner, $variant );
+       }
+
+       /**
+        * If a language supports multiple variants, it is possible that
+        * non-existing link in one variant actually exists in another variant.
+        * This function tries to find it. See e.g. LanguageZh.php
+        *
+        * @param $link String: the name of the link
+        * @param $nt Mixed: the title object of the link
+        * @param $ignoreOtherCond Boolean: to disable other conditions when
+        *              we need to transclude a template or update a category's link
+        * @return Null, the input parameters may be modified upon return
+        */
+       public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
                # If the article has already existed, there is no need to
                # check it again, otherwise it may cause a fault.
                if ( is_object( $nt ) && $nt->exists() ) {
@@ -599,7 +736,7 @@ class LanguageConverter {
                }
 
                $variants = $this->autoConvertToAllVariants( $link );
-               if ( $variants == false ) { // give up
+               if ( !$variants ) { // give up
                        return;
                }
 
@@ -627,14 +764,26 @@ class LanguageConverter {
                }
        }
 
-    /**
+       /**
         * Returns language specific hash options.
-        *
-        * @public
         */
-       function getExtraHashOptions() {
+       public function getExtraHashOptions() {
                $variant = $this->getPreferredVariant();
-               return '!' . $variant ;
+               return '!' . $variant;
+       }
+
+       /**
+        * Guess if a text is written in a variant. This should be implemented in subclasses.
+        *
+        * @param string        $text the text to be checked
+        * @param string        $variant language code of the variant to be checked for
+        * @return bool true if $text appears to be written in $variant, false if not
+        *
+        * @author Nikola Smolenski <smolensk@eunet.rs>
+        * @since 1.18
+        */
+       public function guessVariant($text, $variant) {
+               return false;
        }
 
        /**
@@ -645,32 +794,33 @@ class LanguageConverter {
         */
        function loadDefaultTables() {
                $name = get_class( $this );
-               wfDie( "Must implement loadDefaultTables() method in class $name" );
+               throw new MWException( "Must implement loadDefaultTables() method in class $name" );
        }
 
        /**
         * Load conversion tables either from the cache or the disk.
         * @private
+        * @param $fromCache Boolean: load from memcached? Defaults to true.
         */
-       function loadTables( $fromcache = true ) {
-               global $wgMemc;
+       function loadTables( $fromCache = true ) {
                if ( $this->mTablesLoaded ) {
                        return;
                }
+               global $wgMemc;
                wfProfileIn( __METHOD__ );
                $this->mTablesLoaded = true;
                $this->mTables = false;
-               if ( $fromcache ) {
+               if ( $fromCache ) {
                        wfProfileIn( __METHOD__ . '-cache' );
                        $this->mTables = $wgMemc->get( $this->mCacheKey );
                        wfProfileOut( __METHOD__ . '-cache' );
                }
                if ( !$this->mTables
-                        || !isset( $this->mTables[self::CACHE_VERSION_KEY] ) ) {
+                        || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
                        wfProfileIn( __METHOD__ . '-recache' );
                        // not in cache, or we need a fresh reload.
-                       // we will first load the default tables
-                       // then update them using things in MediaWiki:Zhconversiontable/*
+                       // We will first load the default tables
+                       // then update them using things in MediaWiki:Conversiontable/*
                        $this->loadDefaultTables();
                        foreach ( $this->mVariants as $var ) {
                                $cached = $this->parseCachedTable( $var );
@@ -686,13 +836,12 @@ class LanguageConverter {
                wfProfileOut( __METHOD__ );
        }
 
-    /**
-        * Hook for post processig after conversion tables are loaded.
-        *
+       /**
+        * Hook for post processing after conversion tables are loaded.
         */
        function postLoadTables() { }
 
-    /**
+       /**
         * Reload the conversion tables.
         *
         * @private
@@ -705,7 +854,6 @@ class LanguageConverter {
                $this->loadTables( false );
        }
 
-
        /**
         * Parse the conversion table stored in the cache.
         *
@@ -716,18 +864,16 @@ class LanguageConverter {
         *                      ...
         *              }-
         *
-        *      To make the tables more manageable, subpages are allowed
-        *      and will be parsed recursively if $recursive == true.
+        * To make the tables more manageable, subpages are allowed
+        * and will be parsed recursively if $recursive == true.
         *
+        * @param $code String: language code
+        * @param $subpage String: subpage name
+        * @param $recursive Boolean: parse subpages recursively? Defaults to true.
         */
        function parseCachedTable( $code, $subpage = '', $recursive = true ) {
-               global $wgMessageCache;
                static $parsed = array();
 
-               if ( !is_object( $wgMessageCache ) ) {
-                       return array();
-               }
-
                $key = 'Conversiontable/' . $code;
                if ( $subpage ) {
                        $key .= '/' . $subpage;
@@ -737,10 +883,17 @@ class LanguageConverter {
                }
 
                if ( strpos( $code, '/' ) === false ) {
-                       $txt = $wgMessageCache->get( 'Conversiontable', true, $code );
+                       $txt = MessageCache::singleton()->get( 'Conversiontable', true, $code );
+                       if ( $txt === false ) {
+                               # @todo FIXME: This method doesn't seem to be expecting
+                               # this possible outcome...
+                               $txt = '&lt;Conversiontable&gt;';
+                       }
                } else {
-                       $title = Title::makeTitleSafe( NS_MEDIAWIKI,
-                                                                                  "Conversiontable/$code" );
+                       $title = Title::makeTitleSafe(
+                               NS_MEDIAWIKI,
+                               "Conversiontable/$code"
+                       );
                        if ( $title && $title->exists() ) {
                                $article = new Article( $title );
                                $txt = $article->getContents();
@@ -750,17 +903,17 @@ class LanguageConverter {
                }
 
                // get all subpage links of the form
-               // [[MediaWiki:conversiontable/zh-xx/...|...]]
+               // [[MediaWiki:Conversiontable/zh-xx/...|...]]
                $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
                        ':Conversiontable';
-               $subs = explode( '[[', $txt );
+               $subs = StringUtils::explode( '[[', $txt );
                $sublinks = array();
                foreach ( $subs as $sub ) {
                        $link = explode( ']]', $sub, 2 );
                        if ( count( $link ) != 2 ) {
                                continue;
                        }
-                       $b = explode( '|', $link[0] );
+                       $b = explode( '|', $link[0], 2 );
                        $b = explode( '/', trim( $b[0] ), 3 );
                        if ( count( $b ) == 3 ) {
                                $sublink = $b[2];
@@ -773,20 +926,25 @@ class LanguageConverter {
                        }
                }
 
-
                // parse the mappings in this page
-               $blocks = explode( $this->mMarkup['begin'], $txt );
-               array_shift( $blocks );
+               $blocks = StringUtils::explode( '-{', $txt );
                $ret = array();
+               $first = true;
                foreach ( $blocks as $block ) {
-                       $mappings = explode( $this->mMarkup['end'], $block, 2 );
+                       if ( $first ) {
+                               // Skip the part before the first -{
+                               $first = false;
+                               continue;
+                       }
+                       $mappings = explode( '}-', $block, 2 );
                        $stripped = str_replace( array( "'", '"', '*', '#' ), '',
                                                                         $mappings[0] );
-                       $table = explode( ';', $stripped );
+                       $table = StringUtils::explode( ';', $stripped );
                        foreach ( $table as $t ) {
-                               $m = explode( '=>', $t );
-                               if ( count( $m ) != 2 )
+                               $m = explode( '=>', $t, 3 );
+                               if ( count( $m ) != 2 ) {
                                        continue;
+                               }
                                // trim any trailling comments starting with '//'
                                $tt = explode( '//', $m[1], 2 );
                                $ret[trim( $m[0] )] = trim( $tt[0] );
@@ -794,7 +952,6 @@ class LanguageConverter {
                }
                $parsed[$key] = true;
 
-
                // recursively parse the subpages
                if ( $recursive ) {
                        foreach ( $sublinks as $link ) {
@@ -805,7 +962,7 @@ class LanguageConverter {
 
                if ( $this->mUcfirst ) {
                        foreach ( $ret as $k => $v ) {
-                               $ret[Language::ucfirst( $k )] = Language::ucfirst( $v );
+                               $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
                        }
                }
                return $ret;
@@ -815,18 +972,17 @@ class LanguageConverter {
         * Enclose a string with the "no conversion" tag. This is used by
         * various functions in the Parser.
         *
-        * @param string $text text to be tagged for no conversion
-        * @return string the tagged text
-        * @public
+        * @param $text String: text to be tagged for no conversion
+        * @param $noParse Boolean: unused
+        * @return String: the tagged text
         */
-       function markNoConversion( $text, $noParse = false ) {
+       public function markNoConversion( $text, $noParse = false ) {
                # don't mark if already marked
-               if ( strpos( $text, $this->mMarkup['begin'] )
-                       || strpos( $text, $this->mMarkup['end'] ) ) {
+               if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
                        return $text;
                }
 
-               $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
+               $ret = "-{R|$text}-";
                return $ret;
        }
 
@@ -840,18 +996,29 @@ class LanguageConverter {
 
        /**
         * Hook to refresh the cache of conversion tables when
-        * MediaWiki:conversiontable* is updated.
+        * MediaWiki:Conversiontable* is updated.
         * @private
+        *
+        * @param $article Object: Article object
+        * @param $user Object: User object for the current user
+        * @param $text String: article text (?)
+        * @param $summary String: edit summary of the edit
+        * @param $isMinor Boolean: was the edit marked as minor?
+        * @param $isWatch Boolean: did the user watch this page or not?
+        * @param $section Unused
+        * @param $flags Bitfield
+        * @param $revision Object: new Revision object or null
+        * @return Boolean: true
         */
-       function OnArticleSaveComplete( $article, $user, $text, $summary, $isminor,
-                       $iswatch, $section, $flags, $revision ) {
+       function OnArticleSaveComplete( $article, $user, $text, $summary, $isMinor,
+                       $isWatch, $section, $flags, $revision ) {
                $titleobj = $article->getTitle();
                if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
                        $title = $titleobj->getDBkey();
                        $t = explode( '/', $title, 3 );
                        $c = count( $t );
                        if ( $c > 1 && $t[0] == 'Conversiontable' ) {
-                               if ( in_array( $t[1], $this->mVariants ) ) {
+                               if ( $this->validateVariant( $t[1] ) ) {
                                        $this->reloadTables();
                                }
                        }
@@ -861,15 +1028,46 @@ class LanguageConverter {
 
        /**
         * Armour rendered math against conversion.
-        * Wrap math into rawoutput -{R| math }- syntax.
-        * @public
+        * Escape special chars in parsed math text. (in most cases are img elements)
+        *
+        * @param $text String: text to armour against conversion
+        * @return String: armoured text where { and } have been converted to
+        *                 &#123; and &#125;
         */
-       function armourMath( $text ) {
-               // we need to convert '-{' and '}-' to '-&#123;' and '&#125;-'
-               // to avoid a unwanted '}-' appeared after the math-image.
+       public function armourMath( $text ) {
+               // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
+               // any unwanted markup appearing in the math image tag.
                $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
-               $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
-               return $ret;
+               return $text;
+       }
+
+       /**
+        * Get the cached separator pattern for ConverterRule::parseRules()
+        */
+       function getVarSeparatorPattern() {
+               if ( is_null( $this->mVarSeparatorPattern ) ) {
+                       // varsep_pattern for preg_split:
+                       // text should be splited by ";" only if a valid variant
+                       // name exist after the markup, for example:
+                       //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
+                       //      <span style="font-size:120%;">yyy</span>;}-
+                       // we should split it as:
+                       //  array(
+                       //        [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
+                       //        [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
+                       //        [2] => ''
+                       //       )
+                       $pat = '/;\s*(?=';
+                       foreach ( $this->mVariants as $variant ) {
+                               // zh-hans:xxx;zh-hant:yyy
+                               $pat .= $variant . '\s*:|';
+                               // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
+                               $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
+                       }
+                       $pat .= '\s*$)/';
+                       $this->mVarSeparatorPattern = $pat;
+               }
+               return $this->mVarSeparatorPattern;
        }
 }
 
@@ -887,6 +1085,7 @@ class ConverterRule {
        var $mRules = '';// string : the text of the rules
        var $mRulesAction = 'none';
        var $mFlags = array();
+       var $mVariantFlags = array();
        var $mConvTable = array();
        var $mBidtable = array();// array of the translation in each variant
        var $mUnidtable = array();// array of the translation in each variant
@@ -894,34 +1093,27 @@ class ConverterRule {
        /**
         * Constructor
         *
-        * @param string $text the text between -{ and }-
-        * @param object $converter a  LanguageConverter object
-        * @access public
+        * @param $text String: the text between -{ and }-
+        * @param $converter LanguageConverter object
         */
-       function __construct( $text, $converter ) {
+       public function __construct( $text, $converter ) {
                $this->mText = $text;
                $this->mConverter = $converter;
-               foreach ( $converter->mVariants as $v ) {
-                       $this->mConvTable[$v] = array();
-               }
        }
 
        /**
         * Check if variants array in convert array.
         *
-        * @param string $variant Variant language code
-        * @return string Translated text
-        * @public
+        * @param $variants Array or string: variant language code
+        * @return String: translated text
         */
-       function getTextInBidtable( $variants ) {
-               if ( is_string( $variants ) ) {
-                       $variants = array( $variants );
-               }
-               if ( !is_array( $variants ) ) {
+       public function getTextInBidtable( $variants ) {
+               $variants = (array)$variants;
+               if ( !$variants ) {
                        return false;
                }
                foreach ( $variants as $variant ) {
-                       if ( array_key_exists( $variant, $this->mBidtable ) ) {
+                       if ( isset( $this->mBidtable[$variant] ) ) {
                                return $this->mBidtable[$variant];
                        }
                }
@@ -934,74 +1126,60 @@ class ConverterRule {
         */
        function parseFlags() {
                $text = $this->mText;
-               if ( strlen( $text ) < 2 ) {
-                       $this->mFlags = array( 'R' );
-                       $this->mRules = $text;
-                       return;
-               }
-
                $flags = array();
-               $markup = $this->mConverter->mMarkup;
-               $validFlags = $this->mConverter->mFlags;
-               $variants = $this->mConverter->mVariants;
+               $variantFlags = array();
 
-               $tt = explode( $markup['flagsep'], $text, 2 );
-               if ( count( $tt ) == 2 ) {
-                       $f = explode( $markup['varsep'], $tt[0] );
+               $sepPos = strpos( $text, '|' );
+               if ( $sepPos !== false ) {
+                       $validFlags = $this->mConverter->mFlags;
+                       $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
                        foreach ( $f as $ff ) {
                                $ff = trim( $ff );
-                               if ( array_key_exists( $ff, $validFlags )
-                                        && !in_array( $validFlags[$ff], $flags ) ) {
-                                       $flags[] = $validFlags[$ff];
+                               if ( isset( $validFlags[$ff] ) ) {
+                                       $flags[$validFlags[$ff]] = true;
                                }
                        }
-                       $rules = $tt[1];
-               } else {
-                       $rules = $text;
-               }
-
-               // check flags
-               if ( in_array( 'R', $flags ) ) {
-                       $flags = array( 'R' );// remove other flags
-               } elseif ( in_array( 'N', $flags ) ) {
-                       $flags = array( 'N' );// remove other flags
-               } elseif ( in_array( '-', $flags ) ) {
-                       $flags = array( '-' );// remove other flags
-               } elseif ( count( $flags ) == 1 && $flags[0] == 'T' ) {
-                       $flags[] = 'H';
-               } elseif ( in_array( 'H', $flags ) ) {
+                       $text = strval( substr( $text, $sepPos + 1 ) );
+               }
+
+               if ( !$flags ) {
+                       $flags['S'] = true;
+               } elseif ( isset( $flags['R'] ) ) {
+                       $flags = array( 'R' => true );// remove other flags
+               } elseif ( isset( $flags['N'] ) ) {
+                       $flags = array( 'N' => true );// remove other flags
+               } elseif ( isset( $flags['-'] ) ) {
+                       $flags = array( '-' => true );// remove other flags
+               } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
+                       $flags['H'] = true;
+               } elseif ( isset( $flags['H'] ) ) {
                        // replace A flag, and remove other flags except T
-                       $temp = array( '+', 'H' );
-                       if ( in_array( 'T', $flags ) ) {
-                               $temp[] = 'T';
+                       $temp = array( '+' => true, 'H' => true );
+                       if ( isset( $flags['T'] ) ) {
+                               $temp['T'] = true;
                        }
-                       if ( in_array( 'D', $flags ) ) {
-                               $temp[] = 'D';
+                       if ( isset( $flags['D'] ) ) {
+                               $temp['D'] = true;
                        }
                        $flags = $temp;
                } else {
-                       if ( in_array( 'A', $flags ) ) {
-                               $flags[] = '+';
-                               $flags[] = 'S';
-                       }
-                       if ( in_array( 'D', $flags ) ) {
-                               $flags = array_diff( $flags, array( 'S' ) );
+                       if ( isset( $flags['A'] ) ) {
+                               $flags['+'] = true;
+                               $flags['S'] = true;
                        }
-                       $flags_temp = array();
-                       foreach ( $variants as $variant ) {
-                               // try to find flags like "zh-hans", "zh-hant"
-                               // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
-                               if ( in_array( $variant, $flags ) )
-                                       $flags_temp[] = $variant;
+                       if ( isset( $flags['D'] ) ) {
+                               unset( $flags['S'] );
                        }
-                       if ( count( $flags_temp ) !== 0 ) {
-                               $flags = $flags_temp;
+                       // try to find flags like "zh-hans", "zh-hant"
+                       // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
+                       $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
+                       if ( $variantFlags ) {
+                               $variantFlags = array_flip( $variantFlags );
+                               $flags = array();
                        }
                }
-               if ( count( $flags ) == 0 ) {
-                       $flags = array( 'S' );
-               }
-               $this->mRules = $rules;
+               $this->mVariantFlags = $variantFlags;
+               $this->mRules = $text;
                $this->mFlags = $flags;
        }
 
@@ -1011,50 +1189,28 @@ class ConverterRule {
         */
        function parseRules() {
                $rules = $this->mRules;
-               $flags = $this->mFlags;
                $bidtable = array();
                $unidtable = array();
-               $markup = $this->mConverter->mMarkup;
                $variants = $this->mConverter->mVariants;
-
-               // varsep_pattern for preg_split:
-               // text should be splited by ";" only if a valid variant
-               // name exist after the markup, for example:
-               //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
-        //    <span style="font-size:120%;">yyy</span>;}-
-               // we should split it as:
-               //  array(
-               //        [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
-               //        [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
-               //        [2] => ''
-               //       )
-               $varsep_pattern = '/' . $markup['varsep'] . '\s*' . '(?=';
-               foreach ( $variants as $variant ) {
-                       // zh-hans:xxx;zh-hant:yyy
-                       $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|';
-                       // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
-                       $varsep_pattern .= '[^;]*?' . $markup['unidsep'] . '\s*' . $variant
-                                                       . '\s*' . $markup['codesep'] . '|';
-               }
-               $varsep_pattern .= '\s*$)/';
+               $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
 
                $choice = preg_split( $varsep_pattern, $rules );
 
                foreach ( $choice as $c ) {
-                       $v  = explode( $markup['codesep'], $c, 2 );
+                       $v  = explode( ':', $c, 2 );
                        if ( count( $v ) != 2 ) {
                                // syntax error, skip
                                continue;
                        }
                        $to = trim( $v[1] );
                        $v  = trim( $v[0] );
-                       $u  = explode( $markup['unidsep'], $v, 2 );
+                       $u  = explode( '=>', $v, 2 );
                        // if $to is empty, strtr() could return a wrong result
                        if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
                                $bidtable[$v] = $to;
                        } elseif ( count( $u ) == 2 ) {
                                $from = trim( $u[0] );
-                               $v    = trim( $u[1] );
+                               $v      = trim( $u[1] );
                                if ( array_key_exists( $v, $unidtable )
                                         && !is_array( $unidtable[$v] )
                                         && $to
@@ -1065,7 +1221,7 @@ class ConverterRule {
                                }
                        }
                        // syntax error, pass
-                       if ( !array_key_exists( $v, $this->mConverter->mVariantNames ) ) {
+                       if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
                                $bidtable = array();
                                $unidtable = array();
                                break;
@@ -1138,7 +1294,12 @@ class ConverterRule {
         * @private
         */
        function generateConvTable() {
-               $flags = $this->mFlags;
+               // Special case optimisation
+               if ( !$this->mBidtable && !$this->mUnidtable ) {
+                       $this->mConvTable = array();
+                       return;
+               }
+
                $bidtable = $this->mBidtable;
                $unidtable = $this->mUnidtable;
                $manLevel = $this->mConverter->mManualLevel;
@@ -1148,7 +1309,7 @@ class ConverterRule {
                        /* for bidirectional array
                                fill in the missing variants, if any,
                                with fallbacks */
-                       if ( !array_key_exists( $v, $bidtable ) ) {
+                       if ( !isset( $bidtable[$v] ) ) {
                                $variantFallbacks =
                                        $this->mConverter->getVariantFallbacks( $v );
                                $vf = $this->getTextInBidtable( $variantFallbacks );
@@ -1157,7 +1318,7 @@ class ConverterRule {
                                }
                        }
 
-                       if ( array_key_exists( $v, $bidtable ) ) {
+                       if ( isset( $bidtable[$v] ) ) {
                                foreach ( $vmarked as $vo ) {
                                        // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
                                        // or -{H|zh:WordZh;zh-tw:WordTw}-
@@ -1173,35 +1334,36 @@ class ConverterRule {
                                }
                                $vmarked[] = $v;
                        }
-                       /*for unidirectional array fill to convert tables */
-                       if ( ( $manLevel[$v] == 'bidirectional'
-                                  || $manLevel[$v] == 'unidirectional' )
-                                && array_key_exists( $v, $unidtable ) ) {
-                               $ct = $this->mConvTable[$v];
-                               $this->mConvTable[$v] = array_merge( $ct, $unidtable[$v] );
+                       /* for unidirectional array fill to convert tables */
+                       if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
+                               && isset( $unidtable[$v] ) )
+                       {
+                               if ( isset( $this->mConvTable[$v] ) ) {
+                                       $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
+                               } else {
+                                       $this->mConvTable[$v] = $unidtable[$v];
+                               }
                        }
                }
        }
 
        /**
         * Parse rules and flags.
-        * @public
+        * @param $variant String: variant language code
         */
-       function parse( $variant = NULL ) {
+       public function parse( $variant = null ) {
                if ( !$variant ) {
                        $variant = $this->mConverter->getPreferredVariant();
                }
 
-               $variants = $this->mConverter->mVariants;
                $this->parseFlags();
                $flags = $this->mFlags;
 
                // convert to specified variant
                // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
-               if ( count( array_diff( $flags, $variants ) ) == 0
-                        and count( $flags ) != 0 ) {
+               if ( $this->mVariantFlags ) {
                        // check if current variant in flags
-                       if ( in_array( $variant, $flags ) ) {
+                       if ( isset( $this->mVariantFlags[$variant] ) ) {
                                // then convert <text to convert> to current language
                                $this->mRules = $this->mConverter->autoConvert( $this->mRules,
                                                                                                                                $variant );
@@ -1211,7 +1373,7 @@ class ConverterRule {
                                        $this->mConverter->getVariantFallbacks( $variant );
                                foreach ( $variantFallbacks as $variantFallback ) {
                                        // if current variant's fallback exist in flags
-                                       if ( in_array( $variantFallback, $flags ) ) {
+                                       if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
                                                // then convert <text to convert> to fallback language
                                                $this->mRules =
                                                        $this->mConverter->autoConvert( $this->mRules,
@@ -1220,115 +1382,124 @@ class ConverterRule {
                                        }
                                }
                        }
-                       $this->mFlags = $flags = array( 'R' );
+                       $this->mFlags = $flags = array( 'R' => true );
                }
 
-               if ( !in_array( 'R', $flags ) || !in_array( 'N', $flags ) ) {
+               if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
                        // decode => HTML entities modified by Sanitizer::removeHTMLtags
                        $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
-
                        $this->parseRules();
                }
                $rules = $this->mRules;
 
-               if ( count( $this->mBidtable ) == 0
-                        && count( $this->mUnidtable ) == 0 ) {
-                       if ( in_array( '+', $flags ) || in_array( '-', $flags ) ) {
+               if ( !$this->mBidtable && !$this->mUnidtable ) {
+                       if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
                                // fill all variants if text in -{A/H/-|text} without rules
                                foreach ( $this->mConverter->mVariants as $v ) {
                                        $this->mBidtable[$v] = $rules;
                                }
-                       } elseif ( !in_array( 'N', $flags ) && !in_array( 'T', $flags ) ) {
-                               $this->mFlags = $flags = array( 'R' );
+                       } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
+                               $this->mFlags = $flags = array( 'R' => true );
                        }
                }
 
-               if ( in_array( 'R', $flags ) ) {
-                       // if we don't do content convert, still strip the -{}- tags
-                       $this->mRuleDisplay = $rules;
-               } elseif ( in_array( 'N', $flags ) ) {
-                       // proces N flag: output current variant name
-                       $this->mRuleDisplay =
-                               $this->mConverter->mVariantNames[ trim( $rules ) ];
-               } elseif ( in_array( 'D', $flags ) ) {
-                       // proces D flag: output rules description
-                       $this->mRuleDisplay = $this->getRulesDesc();
-               } elseif ( in_array( 'H', $flags ) || in_array( '-', $flags ) ) {
-                       // proces H,- flag or T only: output nothing
-                       $this->mRuleDisplay = '';
-               } elseif ( in_array( 'S', $flags ) ) {
-                       $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
-               } else {
-                       $this->mRuleDisplay = $this->mManualCodeError;
-               }
-               // process T flag
-               if ( in_array( 'T', $flags ) ) {
-                       $this->mRuleTitle = $this->getRuleConvertedStr( $variant );
-               }
-
-               if ( in_array( '-', $flags ) ) {
-                       $this->mRulesAction = 'remove';
+               $this->mRuleDisplay = false;
+               foreach ( $flags as $flag => $unused ) {
+                       switch ( $flag ) {
+                               case 'R':
+                                       // if we don't do content convert, still strip the -{}- tags
+                                       $this->mRuleDisplay = $rules;
+                                       break;
+                               case 'N':
+                                       // process N flag: output current variant name
+                                       $ruleVar = trim( $rules );
+                                       if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
+                                               $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
+                                       } else {
+                                               $this->mRuleDisplay = '';
+                                       }
+                                       break;
+                               case 'D':
+                                       // process D flag: output rules description
+                                       $this->mRuleDisplay = $this->getRulesDesc();
+                                       break;
+                               case 'H':
+                                       // process H,- flag or T only: output nothing
+                                       $this->mRuleDisplay = '';
+                                       break;
+                               case '-':
+                                       $this->mRulesAction = 'remove';
+                                       $this->mRuleDisplay = '';
+                                       break;
+                               case '+':
+                                       $this->mRulesAction = 'add';
+                                       $this->mRuleDisplay = '';
+                                       break;
+                               case 'S':
+                                       $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
+                                       break;
+                               case 'T':
+                                       $this->mRuleTitle = $this->getRuleConvertedStr( $variant );
+                                       $this->mRuleDisplay = '';
+                                       break;
+                               default:
+                                       // ignore unknown flags (but see error case below)
+                       }
                }
-               if ( in_array( '+', $flags ) ) {
-                       $this->mRulesAction = 'add';
+               if ( $this->mRuleDisplay === false ) {
+                       $this->mRuleDisplay = $this->mManualCodeError;
                }
 
                $this->generateConvTable();
        }
 
        /**
-        * @public
+        * @todo FIXME: code this function :)
         */
-       function hasRules() {
+       public function hasRules() {
                // TODO:
        }
 
        /**
         * Get display text on markup -{...}-
-        * @public
         */
-       function getDisplay() {
+       public function getDisplay() {
                return $this->mRuleDisplay;
        }
 
        /**
         * Get converted title.
-        * @public
         */
-       function getTitle() {
+       public function getTitle() {
                return $this->mRuleTitle;
        }
 
        /**
         * Return how deal with conversion rules.
-        * @public
         */
-       function getRulesAction() {
+       public function getRulesAction() {
                return $this->mRulesAction;
        }
 
        /**
-        * Get conversion table. ( bidirectional and unidirectional
-        * conversion table )
-        * @public
+        * Get conversion table. (bidirectional and unidirectional
+        * conversion table)
         */
-       function getConvTable() {
+       public function getConvTable() {
                return $this->mConvTable;
        }
 
        /**
         * Get conversion rules string.
-        * @public
         */
-       function getRules() {
+       public function getRules() {
                return $this->mRules;
        }
 
        /**
         * Get conversion flags.
-        * @public
         */
-       function getFlags() {
+       public function getFlags() {
                return $this->mFlags;
        }
 }