From d59f27aeab08b171e5ab6a081e763a4cad0bca04 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Tue, 3 Jul 2018 16:12:38 -0400 Subject: [PATCH] Accept BCP 47 codes as aliases for nonstandard variants The browser Accept-Language header uses BCP 47 codes, which don't precisely match our internal mediawiki variant names in a number of places. Allow proper BCP 47 codes to alias our internal variants for: Accept-Language parsing, URL parsing, user preferences, and explicit enumeration of codes in LanguageConverter rules. This is a replay of an earlier merged patch, 0818070c59eb4e8330496796b943951c01c7a573, which had to be reverted because it was based on 8380f0173e79b66f0e2afd6c49cd88afb9f4f6f3 which caused regressions in the Babel extension (T199941). Change-Id: Ica89d9547c58967747ab0fa15d4e83be5378796d --- languages/FakeConverter.php | 4 + languages/LanguageConverter.php | 31 +++- .../languages/LanguageConverterTest.php | 138 +++++++++++++++++- 3 files changed, 166 insertions(+), 7 deletions(-) diff --git a/languages/FakeConverter.php b/languages/FakeConverter.php index c4ec6382e5..2fc85e5d1b 100644 --- a/languages/FakeConverter.php +++ b/languages/FakeConverter.php @@ -116,6 +116,10 @@ class FakeConverter { } function validateVariant( $variant = null ) { + if ( $variant === null ) { + return null; + } + $variant = strtolower( $variant ); return $variant === $this->mLang->getCode() ? $variant : null; } diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index e51dca93ee..ea26c64dc7 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -175,11 +175,13 @@ class LanguageConverter { $req = $this->validateVariant( $wgDefaultLanguageVariant ); } + $req = $this->validateVariant( $req ); + // This function, unlike the other get*Variant functions, is // not memoized (i.e. there return value is not cached) since // new information might appear during processing after this // is first called. - if ( $this->validateVariant( $req ) ) { + if ( $req ) { return $req; } return $this->mMainLanguageCode; @@ -215,9 +217,25 @@ class LanguageConverter { * @return mixed Returns the variant if it is valid, null otherwise */ public function validateVariant( $variant = null ) { - if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { + if ( $variant === null ) { + return null; + } + // Our internal variants are always lower-case; the variant we + // are validating may have mixed case. + $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) ); + if ( in_array( $variant, $this->mVariants ) ) { return $variant; } + // Browsers are supposed to use BCP 47 standard in the + // Accept-Language header, but not all of our internal + // mediawiki variant codes are BCP 47. Map BCP 47 code + // to our internal code. + foreach ( $this->mVariants as $v ) { + // Case-insensitive match (BCP 47 is mixed case) + if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) { + return $v; + } + } return null; } @@ -296,7 +314,7 @@ class LanguageConverter { return $this->mHeaderVariant; } - // see if some supported language variant is set in the + // See if some supported language variant is set in the // HTTP header. $languages = array_keys( $wgRequest->getAcceptLang() ); if ( empty( $languages ) ) { @@ -548,17 +566,18 @@ class LanguageConverter { $convTable = $convRule->getConvTable(); $action = $convRule->getRulesAction(); foreach ( $convTable as $variant => $pair ) { - if ( !$this->validateVariant( $variant ) ) { + $v = $this->validateVariant( $variant ); + if ( !$v ) { continue; } if ( $action == 'add' ) { // More efficient than array_merge(), about 2.5 times. foreach ( $pair as $from => $to ) { - $this->mTables[$variant]->setPair( $from, $to ); + $this->mTables[$v]->setPair( $from, $to ); } } elseif ( $action == 'remove' ) { - $this->mTables[$variant]->removeArray( $pair ); + $this->mTables[$v]->removeArray( $pair ); } } } diff --git a/tests/phpunit/languages/LanguageConverterTest.php b/tests/phpunit/languages/LanguageConverterTest.php index 8ccacfc23a..e53b86e259 100644 --- a/tests/phpunit/languages/LanguageConverterTest.php +++ b/tests/phpunit/languages/LanguageConverterTest.php @@ -20,7 +20,9 @@ class LanguageConverterTest extends MediaWikiLangTestCase { $this->lang = new LanguageToTest(); $this->lc = new TestConverter( $this->lang, 'tg', - [ 'tg', 'tg-latn' ] + # Adding 'sgs' as a variant to ensure we handle deprecated codes + # adding 'simple' as a variant to ensure we handle non BCP 47 codes + [ 'tg', 'tg-latn', 'sgs', 'simple' ] ); } @@ -38,6 +40,39 @@ class LanguageConverterTest extends MediaWikiLangTestCase { $this->assertEquals( 'tg', $this->lc->getPreferredVariant() ); } + /** + * @covers LanguageConverter::getPreferredVariant + * @covers LanguageConverter::getURLVariant + */ + public function testGetPreferredVariantUrl() { + global $wgRequest; + $wgRequest->setVal( 'variant', 'tg-latn' ); + + $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() ); + } + + /** + * @covers LanguageConverter::getPreferredVariant + * @covers LanguageConverter::getURLVariant + */ + public function testGetPreferredVariantUrlDeprecated() { + global $wgRequest; + $wgRequest->setVal( 'variant', 'bat-smg' ); + + $this->assertEquals( 'sgs', $this->lc->getPreferredVariant() ); + } + + /** + * @covers LanguageConverter::getPreferredVariant + * @covers LanguageConverter::getURLVariant + */ + public function testGetPreferredVariantUrlBCP47() { + global $wgRequest; + $wgRequest->setVal( 'variant', 'en-simple' ); + + $this->assertEquals( 'simple', $this->lc->getPreferredVariant() ); + } + /** * @covers LanguageConverter::getPreferredVariant * @covers LanguageConverter::getHeaderVariant @@ -49,6 +84,17 @@ class LanguageConverterTest extends MediaWikiLangTestCase { $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() ); } + /** + * @covers LanguageConverter::getPreferredVariant + * @covers LanguageConverter::getHeaderVariant + */ + public function testGetPreferredVariantHeadersBCP47() { + global $wgRequest; + $wgRequest->setHeader( 'Accept-Language', 'en-simple' ); + + $this->assertEquals( 'simple', $this->lc->getPreferredVariant() ); + } + /** * @covers LanguageConverter::getPreferredVariant * @covers LanguageConverter::getHeaderVariant @@ -98,6 +144,38 @@ class LanguageConverterTest extends MediaWikiLangTestCase { $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() ); } + /** + * @covers LanguageConverter::getPreferredVariant + */ + public function testGetPreferredVariantUserOptionDeprecated() { + global $wgUser; + + $wgUser = new User; + $wgUser->load(); // from 'defaults' + $wgUser->mId = 1; + $wgUser->mDataLoaded = true; + $wgUser->mOptionsLoaded = true; + $wgUser->setOption( 'variant', 'bat-smg' ); + + $this->assertEquals( 'sgs', $this->lc->getPreferredVariant() ); + } + + /** + * @covers LanguageConverter::getPreferredVariant + */ + public function testGetPreferredVariantUserOptionBCP47() { + global $wgUser; + + $wgUser = new User; + $wgUser->load(); // from 'defaults' + $wgUser->mId = 1; + $wgUser->mDataLoaded = true; + $wgUser->mOptionsLoaded = true; + $wgUser->setOption( 'variant', 'en-simple' ); + + $this->assertEquals( 'simple', $this->lc->getPreferredVariant() ); + } + /** * @covers LanguageConverter::getPreferredVariant * @covers LanguageConverter::getUserVariant @@ -116,6 +194,42 @@ class LanguageConverterTest extends MediaWikiLangTestCase { $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() ); } + /** + * @covers LanguageConverter::getPreferredVariant + * @covers LanguageConverter::getUserVariant + */ + public function testGetPreferredVariantUserOptionForForeignLanguageDeprecated() { + global $wgUser; + + $this->setContentLang( 'en' ); + $wgUser = new User; + $wgUser->load(); // from 'defaults' + $wgUser->mId = 1; + $wgUser->mDataLoaded = true; + $wgUser->mOptionsLoaded = true; + $wgUser->setOption( 'variant-tg', 'bat-smg' ); + + $this->assertEquals( 'sgs', $this->lc->getPreferredVariant() ); + } + + /** + * @covers LanguageConverter::getPreferredVariant + * @covers LanguageConverter::getUserVariant + */ + public function testGetPreferredVariantUserOptionForForeignLanguageBCP47() { + global $wgUser; + + $this->setContentLang( 'en' ); + $wgUser = new User; + $wgUser->load(); // from 'defaults' + $wgUser->mId = 1; + $wgUser->mDataLoaded = true; + $wgUser->mOptionsLoaded = true; + $wgUser->setOption( 'variant-tg', 'en-simple' ); + + $this->assertEquals( 'simple', $this->lc->getPreferredVariant() ); + } + /** * @covers LanguageConverter::getPreferredVariant * @covers LanguageConverter::getUserVariant @@ -145,6 +259,26 @@ class LanguageConverterTest extends MediaWikiLangTestCase { $this->assertEquals( 'tg-latn', $this->lc->getPreferredVariant() ); } + /** + * @covers LanguageConverter::getPreferredVariant + */ + public function testGetPreferredVariantDefaultLanguageVariantDeprecated() { + global $wgDefaultLanguageVariant; + + $wgDefaultLanguageVariant = 'bat-smg'; + $this->assertEquals( 'sgs', $this->lc->getPreferredVariant() ); + } + + /** + * @covers LanguageConverter::getPreferredVariant + */ + public function testGetPreferredVariantDefaultLanguageVariantBCP47() { + global $wgDefaultLanguageVariant; + + $wgDefaultLanguageVariant = 'en-simple'; + $this->assertEquals( 'simple', $this->lc->getPreferredVariant() ); + } + /** * @covers LanguageConverter::getPreferredVariant * @covers LanguageConverter::getURLVariant @@ -192,6 +326,8 @@ class TestConverter extends LanguageConverter { function loadDefaultTables() { $this->mTables = [ + 'sgs' => new ReplacementArray(), + 'simple' => new ReplacementArray(), 'tg-latn' => new ReplacementArray( $this->table ), 'tg' => new ReplacementArray() ]; -- 2.20.1