Map dummy language codes in sites
authorAdrian Heine <adrian.heine@wikimedia.de>
Tue, 7 Jun 2016 13:15:36 +0000 (15:15 +0200)
committerDaniel Kinzler <daniel.kinzler@wikimedia.de>
Tue, 7 Jun 2016 13:43:48 +0000 (13:43 +0000)
The script that populates the sites table assumes that the
subdomain is the content language code. This is not true for
all wikis. This patch introduces a mapping to fix this issue
on the fly, based on $wgDummyLanguageCodes.

This is driven by the need to avoid "bad" language codes when
creating wikidata items from the client, when "linking" two
pages on different wikis. When we do this, we use the language
code from the sites table for the labels of the new item.
We would like to forbid "dummy" languages in labels and
descriptions, for consistency.

Change-Id: I6452761e14d9902bb069e32d0f499bc39e680453

includes/ServiceWiring.php
includes/site/DBSiteStore.php
includes/site/FileBasedSiteLookup.php
tests/phpunit/includes/site/DBSiteStoreTest.php
tests/phpunit/includes/site/FileBasedSiteLookupTest.php

index 293e6eb..e53b9ed 100644 (file)
@@ -59,6 +59,9 @@ return [
 
        'SiteStore' => function( MediaWikiServices $services ) {
                $rawSiteStore = new DBSiteStore( $services->getDBLoadBalancer() );
+               $rawSiteStore->setLanguageCodeMapping(
+                       $services->getMainConfig()->get( 'DummyLanguageCodes' ) ?: []
+               );
 
                // TODO: replace wfGetCache with a CacheFactory service.
                // TODO: replace wfIsHHVM with a capabilities service.
index 974789f..c1c10c2 100644 (file)
@@ -40,6 +40,11 @@ class DBSiteStore implements SiteStore {
         */
        private $dbLoadBalancer;
 
+       /**
+        * @var string[]
+        */
+       private $languageCodeMapping = [];
+
        /**
         * @since 1.27
         *
@@ -96,15 +101,17 @@ class DBSiteStore implements SiteStore {
                );
 
                foreach ( $res as $row ) {
+                       $languageCode = $row->site_language === '' ? null : $row->site_language;
+                       if ( isset( $this->languageCodeMapping[ $languageCode ] ) ) {
+                               $languageCode = $this->languageCodeMapping[ $languageCode ];
+                       }
+
                        $site = Site::newForType( $row->site_type );
                        $site->setGlobalId( $row->site_global_key );
                        $site->setInternalId( (int)$row->site_id );
                        $site->setForward( (bool)$row->site_forward );
                        $site->setGroup( $row->site_group );
-                       $site->setLanguageCode( $row->site_language === ''
-                               ? null
-                               : $row->site_language
-                       );
+                       $site->setLanguageCode( $languageCode );
                        $site->setSource( $row->site_source );
                        $site->setExtraData( unserialize( $row->site_data ) );
                        $site->setExtraConfig( unserialize( $row->site_config ) );
@@ -287,4 +294,13 @@ class DBSiteStore implements SiteStore {
                return $ok;
        }
 
+       /**
+        * Provide an array that maps language codes
+        *
+        * @param string[] $newMapping
+        */
+       public function setLanguageCodeMapping( array $newMapping ) {
+               $this->languageCodeMapping = $newMapping;
+       }
+
 }
index 9654440..424d8e6 100644 (file)
@@ -42,6 +42,11 @@ class FileBasedSiteLookup implements SiteLookup {
         */
        private $cacheFile;
 
+       /**
+        * @var string[]
+        */
+       private $languageCodeMapping = [];
+
        /**
         * @param string $cacheFile
         */
@@ -118,13 +123,18 @@ class FileBasedSiteLookup implements SiteLookup {
         * @return Site
         */
        private function newSiteFromArray( array $data ) {
+               $languageCode = $data['language'];
+               if ( isset( $this->languageCodeMapping[ $languageCode ] ) ) {
+                       $languageCode = $this->languageCodeMapping[ $languageCode ];
+               }
+
                $siteType = array_key_exists( 'type', $data ) ? $data['type'] : Site::TYPE_UNKNOWN;
                $site = Site::newForType( $siteType );
 
                $site->setGlobalId( $data['globalid'] );
                $site->setForward( $data['forward'] );
                $site->setGroup( $data['group'] );
-               $site->setLanguageCode( $data['language'] );
+               $site->setLanguageCode( $languageCode );
                $site->setSource( $data['source'] );
                $site->setExtraData( $data['data'] );
                $site->setExtraConfig( $data['config'] );
@@ -136,4 +146,13 @@ class FileBasedSiteLookup implements SiteLookup {
                return $site;
        }
 
+       /**
+        * Provide an array that maps language codes
+        *
+        * @param string[] $newMapping
+        */
+       public function setLanguageCodeMapping( array $newMapping ) {
+               $this->languageCodeMapping = $newMapping;
+       }
+
 }
index 32dd7f2..316fd89 100644 (file)
@@ -67,6 +67,20 @@ class DBSiteStoreTest extends MediaWikiTestCase {
                }
        }
 
+       /**
+        * @covers DBSiteStore::getSites
+        * @covers DBSiteStore::setLanguageCodeMapping
+        */
+       public function testLanguageCodeMapping() {
+               TestSites::insertIntoDb();
+
+               $store = $this->newDBSiteStore();
+               $store->setLanguageCodeMapping( [ 'no' => 'nb' ] );
+
+               $site = $store->getSite( 'nowiki' );
+               $this->assertEquals( $site->getLanguageCode(), 'nb' );
+       }
+
        /**
         * @covers DBSiteStore::saveSites
         */
index 7984795..bebda79 100644 (file)
@@ -98,4 +98,15 @@ class FileBasedSiteLookupTest extends PHPUnit_Framework_TestCase {
                return tempnam( sys_get_temp_dir(), 'mw-test-sitelist' );
        }
 
+       public function testLanguageCodeMapping() {
+               $sites = $this->getSites();
+               $cacheBuilder = $this->newSitesCacheFileBuilder( $sites );
+               $cacheBuilder->build();
+
+               $cache = new FileBasedSiteLookup( $this->cacheFile );
+               $cache->setLanguageCodeMapping( [ 'en' => 'fa' ] );
+
+               $this->assertEquals( $cache->getSite( 'enwiktionary' )->getLanguageCode(), 'fa' );
+       }
+
 }