Accept BCP 47 codes as aliases for nonstandard variants
authorC. Scott Ananian <cscott@cscott.net>
Tue, 3 Jul 2018 20:12:38 +0000 (16:12 -0400)
committerC. Scott Ananian <cscott@cscott.net>
Fri, 13 Jul 2018 21:43:20 +0000 (17:43 -0400)
The browser Accept-Language header uses BCP 47 codes, which don't
precisely match our internal mediawiki variant names in a number of
places.  Allow proper BCP 47 codes to alias our internal variants
for: Accept-Language parsing, URL parsing, user preferences, and
explicit enumeration of codes in LanguageConverter rules.

Change-Id: I8468a56d5b88f5786abd0a17b67bda2f1687fd0c

languages/FakeConverter.php
languages/LanguageConverter.php
tests/phpunit/languages/LanguageConverterTest.php

index 22377c2..6d98920 100644 (file)
@@ -116,6 +116,10 @@ class FakeConverter {
        }
 
        function validateVariant( $variant = null ) {
+               if ( $variant === null ) {
+                       return null;
+               }
+               $variant = strtolower( $variant );
                return $variant === $this->mLang->getCode() ? $variant : null;
        }
 
index dcc2cf3..494280c 100644 (file)
@@ -175,11 +175,13 @@ class LanguageConverter {
                        $req = $this->validateVariant( $wgDefaultLanguageVariant );
                }
 
+               $req = $this->validateVariant( $req );
+
                // This function, unlike the other get*Variant functions, is
                // not memoized (i.e. there return value is not cached) since
                // new information might appear during processing after this
                // is first called.
-               if ( $this->validateVariant( $req ) ) {
+               if ( $req ) {
                        return $req;
                }
                return $this->mMainLanguageCode;
@@ -215,9 +217,25 @@ class LanguageConverter {
         * @return mixed Returns the variant if it is valid, null otherwise
         */
        public function validateVariant( $variant = null ) {
-               if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
+               if ( $variant === null ) {
+                       return null;
+               }
+               // Our internal variants are always lower-case; the variant we
+               // are validating may have mixed case.
+               $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) );
+               if ( in_array( $variant, $this->mVariants ) ) {
                        return $variant;
                }
+               // Browsers are supposed to use BCP 47 standard in the
+               // Accept-Language header, but not all of our internal
+               // mediawiki variant codes are BCP 47.  Map BCP 47 code
+               // to our internal code.
+               foreach ( $this->mVariants as $v ) {
+                       // Case-insensitive match (BCP 47 is mixed case)
+                       if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) {
+                               return $v;
+                       }
+               }
                return null;
        }
 
@@ -293,7 +311,7 @@ class LanguageConverter {
                        return $this->mHeaderVariant;
                }
 
-               // see if some supported language variant is set in the
+               // See if some supported language variant is set in the
                // HTTP header.
                $languages = array_keys( $wgRequest->getAcceptLang() );
                if ( empty( $languages ) ) {
@@ -545,17 +563,18 @@ class LanguageConverter {
                $convTable = $convRule->getConvTable();
                $action = $convRule->getRulesAction();
                foreach ( $convTable as $variant => $pair ) {
-                       if ( !$this->validateVariant( $variant ) ) {
+                       $v = $this->validateVariant( $variant );
+                       if ( !$v ) {
                                continue;
                        }
 
                        if ( $action == 'add' ) {
                                // More efficient than array_merge(), about 2.5 times.
                                foreach ( $pair as $from => $to ) {
-                                       $this->mTables[$variant]->setPair( $from, $to );
+                                       $this->mTables[$v]->setPair( $from, $to );
                                }
                        } elseif ( $action == 'remove' ) {
-                               $this->mTables[$variant]->removeArray( $pair );
+                               $this->mTables[$v]->removeArray( $pair );
                        }
                }
        }
index 82ab7de..b5db2ec 100644 (file)
@@ -20,7 +20,9 @@ class LanguageConverterTest extends MediaWikiLangTestCase {
                $this->lang = new LanguageToTest();
                $this->lc = new TestConverter(
                        $this->lang, 'tg',
-                       [ 'tg', 'tg-latn' ]
+                       # Adding 'sgs' as a variant to ensure we handle deprecated codes
+                       # adding 'simple' as a variant to ensure we handle non BCP 47 codes
+                       [ 'tg', 'tg-latn', 'sgs', 'simple' ]
                );
        }
 
@@ -38,6 +40,39 @@ class LanguageConverterTest extends MediaWikiLangTestCase {
                $this->assertEquals( 'tg', $this->lc->getPreferredVariant() );
        }
 
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        * @covers LanguageConverter::getURLVariant
+        */
+       public function testGetPreferredVariantUrl() {
+               global $wgRequest;
+               $wgRequest->setVal( 'variant', 'tg-latn' );
+
+               $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() );
+       }
+
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        * @covers LanguageConverter::getURLVariant
+        */
+       public function testGetPreferredVariantUrlDeprecated() {
+               global $wgRequest;
+               $wgRequest->setVal( 'variant', 'bat-smg' );
+
+               $this->assertEquals( 'sgs', $this->lc->getPreferredVariant() );
+       }
+
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        * @covers LanguageConverter::getURLVariant
+        */
+       public function testGetPreferredVariantUrlBCP47() {
+               global $wgRequest;
+               $wgRequest->setVal( 'variant', 'en-simple' );
+
+               $this->assertEquals( 'simple', $this->lc->getPreferredVariant() );
+       }
+
        /**
         * @covers LanguageConverter::getPreferredVariant
         * @covers LanguageConverter::getHeaderVariant
@@ -49,6 +84,17 @@ class LanguageConverterTest extends MediaWikiLangTestCase {
                $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() );
        }
 
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        * @covers LanguageConverter::getHeaderVariant
+        */
+       public function testGetPreferredVariantHeadersBCP47() {
+               global $wgRequest;
+               $wgRequest->setHeader( 'Accept-Language', 'en-simple' );
+
+               $this->assertEquals( 'simple', $this->lc->getPreferredVariant() );
+       }
+
        /**
         * @covers LanguageConverter::getPreferredVariant
         * @covers LanguageConverter::getHeaderVariant
@@ -98,6 +144,38 @@ class LanguageConverterTest extends MediaWikiLangTestCase {
                $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() );
        }
 
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        */
+       public function testGetPreferredVariantUserOptionDeprecated() {
+               global $wgUser;
+
+               $wgUser = new User;
+               $wgUser->load(); // from 'defaults'
+               $wgUser->mId = 1;
+               $wgUser->mDataLoaded = true;
+               $wgUser->mOptionsLoaded = true;
+               $wgUser->setOption( 'variant', 'bat-smg' );
+
+               $this->assertEquals( 'sgs', $this->lc->getPreferredVariant() );
+       }
+
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        */
+       public function testGetPreferredVariantUserOptionBCP47() {
+               global $wgUser;
+
+               $wgUser = new User;
+               $wgUser->load(); // from 'defaults'
+               $wgUser->mId = 1;
+               $wgUser->mDataLoaded = true;
+               $wgUser->mOptionsLoaded = true;
+               $wgUser->setOption( 'variant', 'en-simple' );
+
+               $this->assertEquals( 'simple', $this->lc->getPreferredVariant() );
+       }
+
        /**
         * @covers LanguageConverter::getPreferredVariant
         * @covers LanguageConverter::getUserVariant
@@ -116,6 +194,42 @@ class LanguageConverterTest extends MediaWikiLangTestCase {
                $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() );
        }
 
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        * @covers LanguageConverter::getUserVariant
+        */
+       public function testGetPreferredVariantUserOptionForForeignLanguageDeprecated() {
+               global $wgContLang, $wgUser;
+
+               $wgContLang = Language::factory( 'en' );
+               $wgUser = new User;
+               $wgUser->load(); // from 'defaults'
+               $wgUser->mId = 1;
+               $wgUser->mDataLoaded = true;
+               $wgUser->mOptionsLoaded = true;
+               $wgUser->setOption( 'variant-tg', 'bat-smg' );
+
+               $this->assertEquals( 'sgs', $this->lc->getPreferredVariant() );
+       }
+
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        * @covers LanguageConverter::getUserVariant
+        */
+       public function testGetPreferredVariantUserOptionForForeignLanguageBCP47() {
+               global $wgContLang, $wgUser;
+
+               $wgContLang = Language::factory( 'en' );
+               $wgUser = new User;
+               $wgUser->load(); // from 'defaults'
+               $wgUser->mId = 1;
+               $wgUser->mDataLoaded = true;
+               $wgUser->mOptionsLoaded = true;
+               $wgUser->setOption( 'variant-tg', 'en-simple' );
+
+               $this->assertEquals( 'simple', $this->lc->getPreferredVariant() );
+       }
+
        /**
         * @covers LanguageConverter::getPreferredVariant
         * @covers LanguageConverter::getUserVariant
@@ -145,6 +259,26 @@ class LanguageConverterTest extends MediaWikiLangTestCase {
                $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() );
        }
 
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        */
+       public function testGetPreferredVariantDefaultLanguageVariantDeprecated() {
+               global $wgDefaultLanguageVariant;
+
+               $wgDefaultLanguageVariant = 'bat-smg';
+               $this->assertEquals( 'sgs', $this->lc->getPreferredVariant() );
+       }
+
+       /**
+        * @covers LanguageConverter::getPreferredVariant
+        */
+       public function testGetPreferredVariantDefaultLanguageVariantBCP47() {
+               global $wgDefaultLanguageVariant;
+
+               $wgDefaultLanguageVariant = 'en-simple';
+               $this->assertEquals( 'simple', $this->lc->getPreferredVariant() );
+       }
+
        /**
         * @covers LanguageConverter::getPreferredVariant
         * @covers LanguageConverter::getURLVariant
@@ -192,6 +326,8 @@ class TestConverter extends LanguageConverter {
 
        function loadDefaultTables() {
                $this->mTables = [
+                       'sgs' => new ReplacementArray(),
+                       'simple' => new ReplacementArray(),
                        'tg-latn' => new ReplacementArray( $this->table ),
                        'tg' => new ReplacementArray()
                ];