Split some Language methods to LanguageNameUtils
authorAryeh Gregor <ayg@aryeh.name>
Thu, 2 May 2019 14:23:42 +0000 (17:23 +0300)
committerPpchelko <ppchelko@wikimedia.org>
Tue, 8 Oct 2019 21:52:07 +0000 (21:52 +0000)
These are static methods that have to do with processing language names
and codes. I didn't include fallback behavior, because that would mean a
circular dependency with LocalisationCache.

In the new class, I renamed AS_AUTONYMS to AUTONYMS, and added a class
constant DEFINED for 'mw' to match the existing SUPPORTED and ALL. I
also renamed fetchLanguageName(s) to getLanguageName(s).

There is 100% test coverage for the code in the new class.

This was previously committed as 2e52f48c2ed and reverted because it
depended on e4468a1d6b6, which had to be reverted for performance
issues. There should be no changes other than rebasing.

Bug: T201405
Change-Id: Ifa346c8a92bf1eb57dc5e79458b32b7b26f1ee8a
(cherry picked from commit 6d80b6c0827401cf8e41589bf134147bb0aa407f)

17 files changed:
RELEASE-NOTES-1.34
autoload.php
includes/MediaWikiServices.php
includes/ServiceWiring.php
includes/cache/localisation/LocalisationCache.php
includes/language/LanguageCode.php
includes/language/LanguageNameUtils.php [new file with mode: 0644]
languages/Language.php
languages/data/Names.php
maintenance/rebuildLocalisationCache.php
tests/common/TestsAutoLoader.php
tests/phpunit/MediaWikiUnitTestCase.php
tests/phpunit/includes/api/ApiQuerySiteinfoTest.php
tests/phpunit/includes/cache/LocalisationCacheTest.php
tests/phpunit/languages/LanguageTest.php
tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php [new file with mode: 0644]
tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php [new file with mode: 0644]

index e98af0a..e808078 100644 (file)
@@ -607,6 +607,11 @@ because of Phabricator reports.
 * RESTBagOStuff users should specify either "JSON" or "PHP" serialization type.
 * Language::getLocalisationCache() is deprecated. Use MediaWikiServices
   instead.
+* The following Language methods are deprecated: isSupportedLanguage,
+  isValidCode, isValidBuiltInCode, isKnownLanguageTag, fetchLanguageNames,
+  fetchLanguageName, getFileName, getMessagesFileName, getJsonMessagesFileName.
+  Use the new LanguageNameUtils class instead. (Note that fetchLanguageName(s)
+  are called getLanguageName(s) in the new class.)
 
 === Other changes in 1.34 ===
 * Added option to specify "Various authors" as author in extension credits using
index dc57ff6..b2147a5 100644 (file)
@@ -881,6 +881,7 @@ $wgAutoloadLocalClasses = [
        'MediaWiki\\Languages\\Data\\CrhExceptions' => __DIR__ . '/languages/data/CrhExceptions.php',
        'MediaWiki\\Languages\\Data\\Names' => __DIR__ . '/languages/data/Names.php',
        'MediaWiki\\Languages\\Data\\ZhConversion' => __DIR__ . '/languages/data/ZhConversion.php',
+       'MediaWiki\\Languages\\LanguageNameUtils' => __DIR__ . '/includes/language/LanguageNameUtils.php',
        'MediaWiki\\Logger\\ConsoleLogger' => __DIR__ . '/includes/debug/logger/ConsoleLogger.php',
        'MediaWiki\\Logger\\ConsoleSpi' => __DIR__ . '/includes/debug/logger/ConsoleSpi.php',
        'MediaWiki\\Logger\\LegacyLogger' => __DIR__ . '/includes/debug/logger/LegacyLogger.php',
index a32fbef..3214e6a 100644 (file)
@@ -21,6 +21,7 @@ use MediaWiki\FileBackend\FSFile\TempFSFileFactory;
 use MediaWiki\FileBackend\LockManager\LockManagerGroupFactory;
 use MediaWiki\Http\HttpRequestFactory;
 use PasswordReset;
+use MediaWiki\Languages\LanguageNameUtils;
 use Wikimedia\Message\IMessageFormatterFactory;
 use MediaWiki\Page\MovePageFactory;
 use MediaWiki\Permissions\PermissionManager;
@@ -627,6 +628,14 @@ class MediaWikiServices extends ServiceContainer {
                return $this->getService( 'InterwikiLookup' );
        }
 
+       /**
+        * @since 1.34
+        * @return LanguageNameUtils
+        */
+       public function getLanguageNameUtils() {
+               return $this->getService( 'LanguageNameUtils' );
+       }
+
        /**
         * @since 1.28
         * @return LinkCache
index f6d0ec0..ed47b5b 100644 (file)
@@ -56,6 +56,7 @@ use MediaWiki\FileBackend\LockManager\LockManagerGroupFactory;
 use MediaWiki\Http\HttpRequestFactory;
 use MediaWiki\Interwiki\ClassicInterwikiLookup;
 use MediaWiki\Interwiki\InterwikiLookup;
+use MediaWiki\Languages\LanguageNameUtils;
 use MediaWiki\Linker\LinkRenderer;
 use MediaWiki\Linker\LinkRendererFactory;
 use MediaWiki\Logger\LoggerFactory;
@@ -259,6 +260,13 @@ return [
                );
        },
 
+       'LanguageNameUtils' => function ( MediaWikiServices $services ) : LanguageNameUtils {
+               return new LanguageNameUtils( new ServiceOptions(
+                       LanguageNameUtils::$constructorOptions,
+                       $services->getMainConfig()
+               ) );
+       },
+
        'LinkCache' => function ( MediaWikiServices $services ) : LinkCache {
                return new LinkCache(
                        $services->getTitleFormatter(),
@@ -313,7 +321,8 @@ return [
                        $logger,
                        [ function () use ( $services ) {
                                $services->getResourceLoader()->getMessageBlobStore()->clear();
-                       } ]
+                       } ],
+                       $services->getLanguageNameUtils()
                );
        },
 
index a9e6969..49b2a47 100644 (file)
@@ -23,6 +23,7 @@
 use CLDRPluralRuleParser\Evaluator;
 use CLDRPluralRuleParser\Error as CLDRPluralRuleError;
 use MediaWiki\Config\ServiceOptions;
+use MediaWiki\Languages\LanguageNameUtils;
 use Psr\Log\LoggerInterface;
 
 /**
@@ -73,6 +74,9 @@ class LocalisationCache {
        /** @var callable[] See comment for parameter in constructor */
        private $clearStoreCallbacks;
 
+       /** @var LanguageNameUtils */
+       private $langNameUtils;
+
        /**
         * A 2-d associative array, code/key, where presence indicates that the item
         * is loaded. Value arbitrary.
@@ -244,13 +248,15 @@ class LocalisationCache {
         * @param callable[] $clearStoreCallbacks To be called whenever the cache is cleared. Can be
         *   used to clear other caches that depend on this one, such as ResourceLoader's
         *   MessageBlobStore.
+        * @param LanguageNameUtils $langNameUtils
         * @throws MWException
         */
        function __construct(
                ServiceOptions $options,
                LCStore $store,
                LoggerInterface $logger,
-               array $clearStoreCallbacks = []
+               array $clearStoreCallbacks,
+               LanguageNameUtils $langNameUtils
        ) {
                $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
 
@@ -258,6 +264,7 @@ class LocalisationCache {
                $this->store = $store;
                $this->logger = $logger;
                $this->clearStoreCallbacks = $clearStoreCallbacks;
+               $this->langNameUtils = $langNameUtils;
 
                // Keep this separate from $this->options so it can be mutable
                $this->manualRecache = $options->get( 'manualRecache' );
@@ -470,7 +477,7 @@ class LocalisationCache {
                $this->initialisedLangs[$code] = true;
 
                # If the code is of the wrong form for a Messages*.php file, do a shallow fallback
-               if ( !Language::isValidBuiltInCode( $code ) ) {
+               if ( !$this->langNameUtils->isValidBuiltInCode( $code ) ) {
                        $this->initShallowFallback( $code, 'en' );
 
                        return;
@@ -478,7 +485,7 @@ class LocalisationCache {
 
                # Recache the data if necessary
                if ( !$this->manualRecache && $this->isExpired( $code ) ) {
-                       if ( Language::isSupportedLanguage( $code ) ) {
+                       if ( $this->langNameUtils->isSupportedLanguage( $code ) ) {
                                $this->recache( $code );
                        } elseif ( $code === 'en' ) {
                                throw new MWException( 'MessagesEn.php is missing.' );
@@ -707,7 +714,7 @@ class LocalisationCache {
                global $IP;
 
                // This reads in the PHP i18n file with non-messages l10n data
-               $fileName = Language::getMessagesFileName( $code );
+               $fileName = $this->langNameUtils->getMessagesFileName( $code );
                if ( !file_exists( $fileName ) ) {
                        $data = [];
                } else {
index 7d954d3..1d2f0b4 100644 (file)
@@ -21,7 +21,6 @@
 
 /**
  * Methods for dealing with language codes.
- * @todo Move some of the code-related static methods out of Language into this class
  *
  * @since 1.29
  * @ingroup Language
diff --git a/includes/language/LanguageNameUtils.php b/includes/language/LanguageNameUtils.php
new file mode 100644 (file)
index 0000000..08d9ab3
--- /dev/null
@@ -0,0 +1,319 @@
+<?php
+/**
+ * Internationalisation code.
+ * See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * @defgroup Language Language
+ */
+
+namespace MediaWiki\Languages;
+
+use HashBagOStuff;
+use Hooks;
+use MediaWiki\Config\ServiceOptions;
+use MediaWikiTitleCodec;
+use MWException;
+use Wikimedia\Assert\Assert;
+
+/**
+ * @ingroup Language
+ *
+ * A service that provides utilities to do with language names and codes.
+ *
+ * @since 1.34
+ */
+class LanguageNameUtils {
+       /**
+        * Return autonyms in getLanguageName(s).
+        */
+       const AUTONYMS = null;
+
+       /**
+        * Return all known languages in getLanguageName(s).
+        */
+       const ALL = 'all';
+
+       /**
+        * Return in getLanguageName(s) only the languages that are defined by MediaWiki.
+        */
+       const DEFINED = 'mw';
+
+       /**
+        * Return in getLanguageName(s) only the languages for which we have at least some localisation.
+        */
+       const SUPPORTED = 'mwfile';
+
+       /** @var ServiceOptions */
+       private $options;
+
+       /**
+        * Cache for language names
+        * @var HashBagOStuff|null
+        */
+       private $languageNameCache;
+
+       /**
+        * Cache for validity of language codes
+        * @var array
+        */
+       private $validCodeCache = [];
+
+       public static $constructorOptions = [
+               'ExtraLanguageNames',
+               'UsePigLatinVariant',
+       ];
+
+       /**
+        * @param ServiceOptions $options
+        */
+       public function __construct( ServiceOptions $options ) {
+               $options->assertRequiredOptions( self::$constructorOptions );
+               $this->options = $options;
+       }
+
+       /**
+        * Checks whether any localisation is available for that language tag in MediaWiki
+        * (MessagesXx.php or xx.json exists).
+        *
+        * @param string $code Language tag (in lower case)
+        * @return bool Whether language is supported
+        */
+       public function isSupportedLanguage( $code ) {
+               if ( !$this->isValidBuiltInCode( $code ) ) {
+                       return false;
+               }
+
+               if ( $code === 'qqq' ) {
+                       // Special code for internal use, not supported even though there is a qqq.json
+                       return false;
+               }
+
+               return is_readable( $this->getMessagesFileName( $code ) ) ||
+                       is_readable( $this->getJsonMessagesFileName( $code ) );
+       }
+
+       /**
+        * Returns true if a language code string is of a valid form, whether or not it exists. This
+        * includes codes which are used solely for customisation via the MediaWiki namespace.
+        *
+        * @param string $code
+        *
+        * @return bool
+        */
+       public function isValidCode( $code ) {
+               Assert::parameterType( 'string', $code, '$code' );
+               if ( !isset( $this->validCodeCache[$code] ) ) {
+                       // People think language codes are HTML-safe, so enforce it.  Ideally we should only
+                       // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks.  See bugs
+                       // T39564, T39587, T38938.
+                       $this->validCodeCache[$code] =
+                               // Protect against path traversal
+                               strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) &&
+                               !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code );
+               }
+               return $this->validCodeCache[$code];
+       }
+
+       /**
+        * Returns true if a language code is of a valid form for the purposes of internal customisation
+        * of MediaWiki, via Messages*.php or *.json.
+        *
+        * @param string $code
+        * @return bool
+        */
+       public function isValidBuiltInCode( $code ) {
+               Assert::parameterType( 'string', $code, '$code' );
+
+               return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
+       }
+
+       /**
+        * Returns true if a language code is an IETF tag known to MediaWiki.
+        *
+        * @param string $tag
+        *
+        * @return bool
+        */
+       public function isKnownLanguageTag( $tag ) {
+               // Quick escape for invalid input to avoid exceptions down the line when code tries to
+               // process tags which are not valid at all.
+               if ( !$this->isValidBuiltInCode( $tag ) ) {
+                       return false;
+               }
+
+               if ( isset( Data\Names::$names[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) {
+                       return true;
+               }
+
+               return false;
+       }
+
+       /**
+        * Get an array of language names, indexed by code.
+        * @param null|string $inLanguage Code of language in which to return the names
+        *   Use self::AUTONYMS for autonyms (native names)
+        * @param string $include One of:
+        *   self::ALL all available languages
+        *   self::DEFINED only if the language is defined in MediaWiki or wgExtraLanguageNames
+        *     (default)
+        *   self::SUPPORTED only if the language is in self::DEFINED *and* has a message file
+        * @return array Language code => language name (sorted by key)
+        */
+       public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) {
+               $cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage;
+               $cacheKey .= ":$include";
+               if ( !$this->languageNameCache ) {
+                       $this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] );
+               }
+
+               $ret = $this->languageNameCache->get( $cacheKey );
+               if ( !$ret ) {
+                       $ret = $this->getLanguageNamesUncached( $inLanguage, $include );
+                       $this->languageNameCache->set( $cacheKey, $ret );
+               }
+               return $ret;
+       }
+
+       /**
+        * Uncached helper for getLanguageNames
+        * @param null|string $inLanguage As getLanguageNames
+        * @param string $include As getLanguageNames
+        * @return array Language code => language name (sorted by key)
+        */
+       private function getLanguageNamesUncached( $inLanguage, $include ) {
+               // If passed an invalid language code to use, fallback to en
+               if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) {
+                       $inLanguage = 'en';
+               }
+
+               $names = [];
+
+               if ( $inLanguage !== self::AUTONYMS ) {
+                       # TODO: also include for self::AUTONYMS, when this code is more efficient
+                       Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] );
+               }
+
+               $mwNames = $this->options->get( 'ExtraLanguageNames' ) + Data\Names::$names;
+               if ( $this->options->get( 'UsePigLatinVariant' ) ) {
+                       // Pig Latin (for variant development)
+                       $mwNames['en-x-piglatin'] = 'Igpay Atinlay';
+               }
+
+               foreach ( $mwNames as $mwCode => $mwName ) {
+                       # - Prefer own MediaWiki native name when not using the hook
+                       # - For other names just add if not added through the hook
+                       if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
+                               $names[$mwCode] = $mwName;
+                       }
+               }
+
+               if ( $include === self::ALL ) {
+                       ksort( $names );
+                       return $names;
+               }
+
+               $returnMw = [];
+               $coreCodes = array_keys( $mwNames );
+               foreach ( $coreCodes as $coreCode ) {
+                       $returnMw[$coreCode] = $names[$coreCode];
+               }
+
+               if ( $include === self::SUPPORTED ) {
+                       $namesMwFile = [];
+                       # We do this using a foreach over the codes instead of a directory loop so that messages
+                       # files in extensions will work correctly.
+                       foreach ( $returnMw as $code => $value ) {
+                               if ( is_readable( $this->getMessagesFileName( $code ) ) ||
+                                       is_readable( $this->getJsonMessagesFileName( $code ) )
+                               ) {
+                                       $namesMwFile[$code] = $names[$code];
+                               }
+                       }
+
+                       ksort( $namesMwFile );
+                       return $namesMwFile;
+               }
+
+               ksort( $returnMw );
+               # self::DEFINED option; default if it's not one of the other two options
+               # (self::ALL/self::SUPPORTED)
+               return $returnMw;
+       }
+
+       /**
+        * @param string $code The code of the language for which to get the name
+        * @param null|string $inLanguage Code of language in which to return the name (self::AUTONYMS
+        *   for autonyms)
+        * @param string $include See getLanguageNames(), except this defaults to self::ALL instead of
+        *   self::DEFINED
+        * @return string Language name or empty
+        * @since 1.20
+        */
+       public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) {
+               $code = strtolower( $code );
+               $array = $this->getLanguageNames( $inLanguage, $include );
+               return $array[$code] ?? '';
+       }
+
+       /**
+        * Get the name of a file for a certain language code
+        * @param string $prefix Prepend this to the filename
+        * @param string $code Language code
+        * @param string $suffix Append this to the filename
+        * @throws MWException
+        * @return string $prefix . $mangledCode . $suffix
+        */
+       public function getFileName( $prefix, $code, $suffix = '.php' ) {
+               if ( !$this->isValidBuiltInCode( $code ) ) {
+                       throw new MWException( "Invalid language code \"$code\"" );
+               }
+
+               return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
+       }
+
+       /**
+        * @param string $code
+        * @return string
+        */
+       public function getMessagesFileName( $code ) {
+               global $IP;
+               $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' );
+               Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] );
+               return $file;
+       }
+
+       /**
+        * @param string $code
+        * @return string
+        * @throws MWException
+        */
+       public function getJsonMessagesFileName( $code ) {
+               global $IP;
+
+               if ( !$this->isValidBuiltInCode( $code ) ) {
+                       throw new MWException( "Invalid language code \"$code\"" );
+               }
+
+               return "$IP/languages/i18n/$code.json";
+       }
+}
index a8950f5..39b5f73 100644 (file)
@@ -27,8 +27,8 @@
  */
 
 use CLDRPluralRuleParser\Evaluator;
+use MediaWiki\Languages\LanguageNameUtils;
 use MediaWiki\MediaWikiServices;
-use Wikimedia\Assert\Assert;
 
 /**
  * Internationalisation code
@@ -38,21 +38,24 @@ class Language {
        /**
         * Return autonyms in fetchLanguageName(s).
         * @since 1.32
+        * @deprecated since 1.34, LanguageNameUtils::AUTONYMS
         */
-       const AS_AUTONYMS = null;
+       const AS_AUTONYMS = LanguageNameUtils::AUTONYMS;
 
        /**
         * Return all known languages in fetchLanguageName(s).
         * @since 1.32
+        * @deprecated since 1.34, use LanguageNameUtils::ALL
         */
-       const ALL = 'all';
+       const ALL = LanguageNameUtils::ALL;
 
        /**
         * Return in fetchLanguageName(s) only the languages for which we have at
         * least some localisation.
         * @since 1.32
+        * @deprecated since 1.34, use LanguageNameUtils::SUPPORTED
         */
-       const SUPPORTED = 'mwfile';
+       const SUPPORTED = LanguageNameUtils::SUPPORTED;
 
        /**
         * @var LanguageConverter|FakeConverter
@@ -80,6 +83,9 @@ class Language {
        /** @var LocalisationCache */
        private $localisationCache;
 
+       /** @var LanguageNameUtils */
+       private $langNameUtils;
+
        public static $mLangObjCache = [];
 
        /**
@@ -94,6 +100,7 @@ class Language {
         */
        const STRICT_FALLBACKS = 1;
 
+       // TODO Make these const once we drop HHVM support (T192166)
        public static $mWeekdayMsgs = [
                'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
                'friday', 'saturday'
@@ -178,12 +185,6 @@ class Language {
         */
        private static $grammarTransformations;
 
-       /**
-        * Cache for language names
-        * @var HashBagOStuff|null
-        */
-       private static $languageNameCache;
-
        /**
         * Unicode directional formatting characters, for embedBidi()
         */
@@ -239,11 +240,12 @@ class Language {
         * @return Language
         */
        protected static function newFromCode( $code, $fallback = false ) {
-               if ( !self::isValidCode( $code ) ) {
+               $langNameUtils = MediaWikiServices::getInstance()->getLanguageNameUtils();
+               if ( !$langNameUtils->isValidCode( $code ) ) {
                        throw new MWException( "Invalid language code \"$code\"" );
                }
 
-               if ( !self::isValidBuiltInCode( $code ) ) {
+               if ( !$langNameUtils->isValidBuiltInCode( $code ) ) {
                        // It's not possible to customise this code with class files, so
                        // just return a Language object. This is to support uselang= hacks.
                        $lang = new Language;
@@ -262,7 +264,7 @@ class Language {
                // Keep trying the fallback list until we find an existing class
                $fallbacks = self::getFallbacksFor( $code );
                foreach ( $fallbacks as $fallbackCode ) {
-                       if ( !self::isValidBuiltInCode( $fallbackCode ) ) {
+                       if ( !$langNameUtils->isValidBuiltInCode( $fallbackCode ) ) {
                                throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" );
                        }
 
@@ -288,32 +290,25 @@ class Language {
                }
                if ( defined( 'MW_PHPUNIT_TEST' ) ) {
                        MediaWikiServices::getInstance()->resetServiceForTesting( 'LocalisationCache' );
+                       MediaWikiServices::getInstance()->resetServiceForTesting( 'LanguageNameUtils' );
                }
                self::$mLangObjCache = [];
                self::$fallbackLanguageCache = [];
                self::$grammarTransformations = null;
-               self::$languageNameCache = null;
        }
 
        /**
         * Checks whether any localisation is available for that language tag
         * in MediaWiki (MessagesXx.php exists).
         *
+        * @deprecated since 1.34, use LanguageNameUtils
         * @param string $code Language tag (in lower case)
         * @return bool Whether language is supported
         * @since 1.21
         */
        public static function isSupportedLanguage( $code ) {
-               if ( !self::isValidBuiltInCode( $code ) ) {
-                       return false;
-               }
-
-               if ( $code === 'qqq' ) {
-                       return false;
-               }
-
-               return is_readable( self::getMessagesFileName( $code ) ) ||
-                       is_readable( self::getJsonMessagesFileName( $code ) );
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->isSupportedLanguage( $code );
        }
 
        /**
@@ -381,63 +376,45 @@ class Language {
         * not it exists. This includes codes which are used solely for
         * customisation via the MediaWiki namespace.
         *
+        * @deprecated since 1.34, use LanguageNameUtils
+        *
         * @param string $code
         *
         * @return bool
         */
        public static function isValidCode( $code ) {
-               static $cache = [];
-               Assert::parameterType( 'string', $code, '$code' );
-               if ( !isset( $cache[$code] ) ) {
-                       // People think language codes are html safe, so enforce it.
-                       // Ideally we should only allow a-zA-Z0-9-
-                       // but, .+ and other chars are often used for {{int:}} hacks
-                       // see bugs T39564, T39587, T38938
-                       $cache[$code] =
-                               // Protect against path traversal
-                               strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code )
-                               && !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code );
-               }
-               return $cache[$code];
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()->isValidCode( $code );
        }
 
        /**
         * Returns true if a language code is of a valid form for the purposes of
         * internal customisation of MediaWiki, via Messages*.php or *.json.
         *
+        * @deprecated since 1.34, use LanguageNameUtils
+        *
         * @param string $code
         *
         * @since 1.18
         * @return bool
         */
        public static function isValidBuiltInCode( $code ) {
-               Assert::parameterType( 'string', $code, '$code' );
-
-               return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->isValidBuiltInCode( $code );
        }
 
        /**
         * Returns true if a language code is an IETF tag known to MediaWiki.
         *
+        * @deprecated since 1.34, use LanguageNameUtils
+        *
         * @param string $tag
         *
         * @since 1.21
         * @return bool
         */
        public static function isKnownLanguageTag( $tag ) {
-               // Quick escape for invalid input to avoid exceptions down the line
-               // when code tries to process tags which are not valid at all.
-               if ( !self::isValidBuiltInCode( $tag ) ) {
-                       return false;
-               }
-
-               if ( isset( MediaWiki\Languages\Data\Names::$names[$tag] )
-                       || self::fetchLanguageName( $tag, $tag ) !== ''
-               ) {
-                       return true;
-               }
-
-               return false;
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->isKnownLanguageTag( $tag );
        }
 
        /**
@@ -458,7 +435,9 @@ class Language {
                } else {
                        $this->mCode = str_replace( '_', '-', strtolower( substr( static::class, 8 ) ) );
                }
-               $this->localisationCache = MediaWikiServices::getInstance()->getLocalisationCache();
+               $services = MediaWikiServices::getInstance();
+               $this->localisationCache = $services->getLocalisationCache();
+               $this->langNameUtils = $services->getLanguageNameUtils();
        }
 
        /**
@@ -764,7 +743,7 @@ class Language {
                if ( $usemsg && wfMessage( $msg )->exists() ) {
                        return $this->getMessageFromDB( $msg );
                }
-               $name = self::fetchLanguageName( $code );
+               $name = $this->langNameUtils->getLanguageName( $code );
                if ( $name ) {
                        return $name; # if it's defined as a language name, show that
                } else {
@@ -825,6 +804,8 @@ class Language {
 
        /**
         * Get an array of language names, indexed by code.
+        *
+        * @deprecated since 1.34, use LanguageNameUtils::getLanguageNames
         * @param null|string $inLanguage Code of language in which to return the names
         *              Use self::AS_AUTONYMS for autonyms (native names)
         * @param string $include One of:
@@ -835,95 +816,12 @@ class Language {
         * @since 1.20
         */
        public static function fetchLanguageNames( $inLanguage = self::AS_AUTONYMS, $include = 'mw' ) {
-               $cacheKey = $inLanguage === self::AS_AUTONYMS ? 'null' : $inLanguage;
-               $cacheKey .= ":$include";
-               if ( self::$languageNameCache === null ) {
-                       self::$languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] );
-               }
-
-               $ret = self::$languageNameCache->get( $cacheKey );
-               if ( !$ret ) {
-                       $ret = self::fetchLanguageNamesUncached( $inLanguage, $include );
-                       self::$languageNameCache->set( $cacheKey, $ret );
-               }
-               return $ret;
-       }
-
-       /**
-        * Uncached helper for fetchLanguageNames
-        * @param null|string $inLanguage Code of language in which to return the names
-        *              Use self::AS_AUTONYMS for autonyms (native names)
-        * @param string $include One of:
-        *              self::ALL all available languages
-        *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
-        *              self::SUPPORTED only if the language is in 'mw' *and* has a message file
-        * @return array Language code => language name (sorted by key)
-        */
-       private static function fetchLanguageNamesUncached(
-               $inLanguage = self::AS_AUTONYMS,
-               $include = 'mw'
-       ) {
-               global $wgExtraLanguageNames, $wgUsePigLatinVariant;
-
-               // If passed an invalid language code to use, fallback to en
-               if ( $inLanguage !== self::AS_AUTONYMS && !self::isValidCode( $inLanguage ) ) {
-                       $inLanguage = 'en';
-               }
-
-               $names = [];
-
-               if ( $inLanguage ) {
-                       # TODO: also include when $inLanguage is null, when this code is more efficient
-                       Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] );
-               }
-
-               $mwNames = $wgExtraLanguageNames + MediaWiki\Languages\Data\Names::$names;
-               if ( $wgUsePigLatinVariant ) {
-                       // Pig Latin (for variant development)
-                       $mwNames['en-x-piglatin'] = 'Igpay Atinlay';
-               }
-
-               foreach ( $mwNames as $mwCode => $mwName ) {
-                       # - Prefer own MediaWiki native name when not using the hook
-                       # - For other names just add if not added through the hook
-                       if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
-                               $names[$mwCode] = $mwName;
-                       }
-               }
-
-               if ( $include === self::ALL ) {
-                       ksort( $names );
-                       return $names;
-               }
-
-               $returnMw = [];
-               $coreCodes = array_keys( $mwNames );
-               foreach ( $coreCodes as $coreCode ) {
-                       $returnMw[$coreCode] = $names[$coreCode];
-               }
-
-               if ( $include === self::SUPPORTED ) {
-                       $namesMwFile = [];
-                       # We do this using a foreach over the codes instead of a directory
-                       # loop so that messages files in extensions will work correctly.
-                       foreach ( $returnMw as $code => $value ) {
-                               if ( is_readable( self::getMessagesFileName( $code ) )
-                                       || is_readable( self::getJsonMessagesFileName( $code ) )
-                               ) {
-                                       $namesMwFile[$code] = $names[$code];
-                               }
-                       }
-
-                       ksort( $namesMwFile );
-                       return $namesMwFile;
-               }
-
-               ksort( $returnMw );
-               # 'mw' option; default if it's not one of the other two options (all/mwfile)
-               return $returnMw;
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getLanguageNames( $inLanguage, $include );
        }
 
        /**
+        * @deprecated since 1.34, use LanguageNameUtils::getLanguageName
         * @param string $code The code of the language for which to get the name
         * @param null|string $inLanguage Code of language in which to return the name
         *   (SELF::AS_AUTONYMS for autonyms)
@@ -936,9 +834,8 @@ class Language {
                $inLanguage = self::AS_AUTONYMS,
                $include = self::ALL
        ) {
-               $code = strtolower( $code );
-               $array = self::fetchLanguageNames( $inLanguage, $include );
-               return !array_key_exists( $code, $array ) ? '' : $array[$code];
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getLanguageName( $code, $inLanguage, $include );
        }
 
        /**
@@ -4444,6 +4341,8 @@ class Language {
 
        /**
         * Get the name of a file for a certain language code
+        *
+        * @deprecated since 1.34, use LanguageNameUtils
         * @param string $prefix Prepend this to the filename
         * @param string $code Language code
         * @param string $suffix Append this to the filename
@@ -4451,38 +4350,30 @@ class Language {
         * @return string $prefix . $mangledCode . $suffix
         */
        public static function getFileName( $prefix, $code, $suffix = '.php' ) {
-               if ( !self::isValidBuiltInCode( $code ) ) {
-                       throw new MWException( "Invalid language code \"$code\"" );
-               }
-
-               return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getFileName( $prefix, $code, $suffix );
        }
 
        /**
+        * @deprecated since 1.34, use LanguageNameUtils
         * @param string $code
         * @return string
         */
        public static function getMessagesFileName( $code ) {
-               global $IP;
-               $file = self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
-               Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] );
-               return $file;
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getMessagesFileName( $code );
        }
 
        /**
+        * @deprecated since 1.34, use LanguageNameUtils
         * @param string $code
         * @return string
         * @throws MWException
         * @since 1.23
         */
        public static function getJsonMessagesFileName( $code ) {
-               global $IP;
-
-               if ( !self::isValidBuiltInCode( $code ) ) {
-                       throw new MWException( "Invalid language code \"$code\"" );
-               }
-
-               return "$IP/languages/i18n/$code.json";
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getJsonMessagesFileName( $code );
        }
 
        /**
index 783a211..2ae38d5 100644 (file)
@@ -39,7 +39,7 @@ namespace MediaWiki\Languages\Data;
  * If you are adding support for such a language, add it also to
  * the relevant section in shared.css.
  *
- * Do not use this class directly. Use Language::fetchLanguageNames(), which
+ * Do not use this class directly. Use LanguageNameUtils::getLanguageNames(), which
  * includes support for the CLDR extension.
  *
  * @ingroup Language
index 07c5569..e5f799d 100644 (file)
@@ -97,7 +97,8 @@ class RebuildLocalisationCache extends Maintenance {
                        [ function () {
                                MediaWikiServices::getInstance()->getResourceLoader()
                                        ->getMessageBlobStore()->clear();
-                       } ]
+                       } ],
+                       MediaWikiServices::getInstance()->getLanguageNameUtils()
                );
 
                $allCodes = array_keys( Language::fetchLanguageNames( null, 'mwfile' ) );
index 1657e81..5968aed 100644 (file)
@@ -225,6 +225,7 @@ $wgAutoloadClasses += [
 
        # tests/phpunit/unit/includes/language
        'LanguageFallbackTestTrait' => "$testDir/phpunit/unit/includes/language/LanguageFallbackTestTrait.php",
+       'LanguageNameUtilsTestTrait' => "$testDir/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php",
 
        # tests/phpunit/unit/includes/libs/filebackend/fsfile
        'TempFSFileTestTrait' => "$testDir/phpunit/unit/includes/libs/filebackend/fsfile/TempFSFileTestTrait.php",
index fda986c..c63056d 100644 (file)
@@ -53,7 +53,8 @@ abstract class MediaWikiUnitTestCase extends TestCase {
                        'wgAutoloadLocalClasses',
                        // Need for LoggerFactory. Default is NullSpi.
                        'wgMWLoggerDefaultSpi',
-                       'wgAutoloadAttemptLowercase'
+                       'wgAutoloadAttemptLowercase',
+                       'wgLegalTitleChars'
                ];
        }
 
index 7f5ee0c..d1e8e84 100644 (file)
@@ -191,6 +191,7 @@ class ApiQuerySiteinfoTest extends ApiTestCase {
                        'wgExtraInterlanguageLinkPrefixes' => [ 'self' ],
                        'wgExtraLanguageNames' => [ 'self' => 'Recursion' ],
                ] );
+               $this->resetServices();
 
                MessageCache::singleton()->enable();
 
index af1ff86..4dd819a 100644 (file)
@@ -1,6 +1,7 @@
 <?php
 
 use MediaWiki\Config\ServiceOptions;
+use MediaWiki\Languages\LanguageNameUtils;
 use Psr\Log\NullLogger;
 
 /**
@@ -24,6 +25,37 @@ class LocalisationCacheTest extends MediaWikiTestCase {
        protected function getMockLocalisationCache() {
                global $IP;
 
+               $mockLangNameUtils = $this->createMock( LanguageNameUtils::class );
+               $mockLangNameUtils->method( 'isValidBuiltInCode' )->will( $this->returnCallback(
+                       function ( $code ) {
+                               // Copy-paste, but it's only one line
+                               return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
+                       }
+               ) );
+               $mockLangNameUtils->method( 'isSupportedLanguage' )->will( $this->returnCallback(
+                       function ( $code ) {
+                               return in_array( $code, [
+                                       'ar',
+                                       'arz',
+                                       'ba',
+                                       'de',
+                                       'en',
+                                       'ksh',
+                                       'ru',
+                               ] );
+                       }
+               ) );
+               $mockLangNameUtils->method( 'getMessagesFileName' )->will( $this->returnCallback(
+                       function ( $code ) {
+                               global $IP;
+                               $code = str_replace( '-', '_', ucfirst( $code ) );
+                               return "$IP/languages/messages/Messages$code.php";
+                       }
+               ) );
+               $mockLangNameUtils->expects( $this->never() )->method( $this->anythingBut(
+                       'isValidBuiltInCode', 'isSupportedLanguage', 'getMessagesFileName'
+               ) );
+
                $lc = $this->getMockBuilder( LocalisationCache::class )
                        ->setConstructorArgs( [
                                new ServiceOptions( LocalisationCache::CONSTRUCTOR_OPTIONS, [
@@ -33,7 +65,9 @@ class LocalisationCacheTest extends MediaWikiTestCase {
                                        'MessagesDirs' => [],
                                ] ),
                                new LCStoreDB( [] ),
-                               new NullLogger
+                               new NullLogger,
+                               [],
+                               $mockLangNameUtils
                        ] )
                        ->setMethods( [ 'getMessagesDirs' ] )
                        ->getMock();
index c443f20..68dfd37 100644 (file)
@@ -3,6 +3,24 @@
 use Wikimedia\TestingAccessWrapper;
 
 class LanguageTest extends LanguageClassesTestCase {
+       use LanguageNameUtilsTestTrait;
+
+       /** @var array Copy of $wgHooks from before we unset LanguageGetTranslatedLanguageNames */
+       private $origHooks;
+
+       public function setUp() {
+               global $wgHooks;
+
+               parent::setUp();
+
+               // Don't allow installed hooks to run, except if a test restores them via origHooks (needed
+               // for testIsKnownLanguageTag_cldr)
+               $this->origHooks = $wgHooks;
+               $newHooks = $wgHooks;
+               unset( $newHooks['LanguageGetTranslatedLanguageNames'] );
+               $this->setMwGlobals( 'wgHooks', $newHooks );
+       }
+
        /**
         * @covers Language::convertDoubleWidth
         * @covers Language::normalizeForSearch
@@ -510,84 +528,6 @@ class LanguageTest extends LanguageClassesTestCase {
                );
        }
 
-       /**
-        * Test Language::isValidBuiltInCode()
-        * @dataProvider provideLanguageCodes
-        * @covers Language::isValidBuiltInCode
-        */
-       public function testBuiltInCodeValidation( $code, $expected, $message = '' ) {
-               $this->assertEquals( $expected,
-                       (bool)Language::isValidBuiltInCode( $code ),
-                       "validating code $code $message"
-               );
-       }
-
-       public static function provideLanguageCodes() {
-               return [
-                       [ 'fr', true, 'Two letters, minor case' ],
-                       [ 'EN', false, 'Two letters, upper case' ],
-                       [ 'tyv', true, 'Three letters' ],
-                       [ 'be-tarask', true, 'With dash' ],
-                       [ 'be-x-old', true, 'With extension (two dashes)' ],
-                       [ 'be_tarask', false, 'Reject underscores' ],
-               ];
-       }
-
-       /**
-        * Test Language::isKnownLanguageTag()
-        * @dataProvider provideKnownLanguageTags
-        * @covers Language::isKnownLanguageTag
-        */
-       public function testKnownLanguageTag( $code, $message = '' ) {
-               $this->assertTrue(
-                       (bool)Language::isKnownLanguageTag( $code ),
-                       "validating code $code - $message"
-               );
-       }
-
-       public static function provideKnownLanguageTags() {
-               return [
-                       [ 'fr', 'simple code' ],
-                       [ 'bat-smg', 'an MW legacy tag' ],
-                       [ 'sgs', 'an internal standard MW name, for which a legacy tag is used externally' ],
-               ];
-       }
-
-       /**
-        * @covers Language::isKnownLanguageTag
-        */
-       public function testKnownCldrLanguageTag() {
-               if ( !class_exists( 'LanguageNames' ) ) {
-                       $this->markTestSkipped( 'The LanguageNames class is not available. '
-                               . 'The CLDR extension is probably not installed.' );
-               }
-
-               $this->assertTrue(
-                       (bool)Language::isKnownLanguageTag( 'pal' ),
-                       'validating code "pal" an ancient language, which probably will '
-                               . 'not appear in Names.php, but appears in CLDR in English'
-               );
-       }
-
-       /**
-        * Negative tests for Language::isKnownLanguageTag()
-        * @dataProvider provideUnKnownLanguageTags
-        * @covers Language::isKnownLanguageTag
-        */
-       public function testUnknownLanguageTag( $code, $message = '' ) {
-               $this->assertFalse(
-                       (bool)Language::isKnownLanguageTag( $code ),
-                       "checking that code $code is invalid - $message"
-               );
-       }
-
-       public static function provideUnknownLanguageTags() {
-               return [
-                       [ 'mw', 'non-existent two-letter code' ],
-                       [ 'foo"<bar', 'very invalid language code' ],
-               ];
-       }
-
        /**
         * Test too short timestamp
         * @expectedException MWException
@@ -1824,33 +1764,11 @@ class LanguageTest extends LanguageClassesTestCase {
                $lang->getGrammarTransformations();
                $this->assertNotNull( $languageClass->grammarTransformations );
 
-               // Populate $languageNameCache
-               Language::fetchLanguageNames();
-               $this->assertNotNull( $languageClass->languageNameCache );
-
                Language::clearCaches();
 
                $this->assertCount( 0, Language::$mLangObjCache );
                $this->assertCount( 0, $languageClass->fallbackLanguageCache );
                $this->assertNull( $languageClass->grammarTransformations );
-               $this->assertNull( $languageClass->languageNameCache );
-       }
-
-       /**
-        * @dataProvider provideIsSupportedLanguage
-        * @covers Language::isSupportedLanguage
-        */
-       public function testIsSupportedLanguage( $code, $expected, $comment ) {
-               $this->assertEquals( $expected, Language::isSupportedLanguage( $code ), $comment );
-       }
-
-       public static function provideIsSupportedLanguage() {
-               return [
-                       [ 'en', true, 'is supported language' ],
-                       [ 'fi', true, 'is supported language' ],
-                       [ 'bunny', false, 'is not supported language' ],
-                       [ 'FI', false, 'is not supported language, input should be in lower case' ],
-               ];
        }
 
        /**
@@ -1956,4 +1874,82 @@ class LanguageTest extends LanguageClassesTestCase {
                        [ 'èl', 'Ll' , 'Non-ASCII is overridden', [ 'è' => 'L' ] ],
                ];
        }
+
+       // The following methods are for LanguageNameUtilsTestTrait
+
+       private function isSupportedLanguage( $code ) {
+               return Language::isSupportedLanguage( $code );
+       }
+
+       private function isValidCode( $code ) {
+               return Language::isValidCode( $code );
+       }
+
+       private function isValidBuiltInCode( $code ) {
+               return Language::isValidBuiltInCode( $code );
+       }
+
+       private function isKnownLanguageTag( $code ) {
+               return Language::isKnownLanguageTag( $code );
+       }
+
+       /**
+        * Call getLanguageName() and getLanguageNames() using the Language static methods.
+        *
+        * @param array $options To set globals for testing Language
+        * @param string $expected
+        * @param string $code
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       private function assertGetLanguageNames( array $options, $expected, $code, ...$otherArgs ) {
+               if ( $options ) {
+                       foreach ( $options as $key => $val ) {
+                               $this->setMwGlobals( "wg$key", $val );
+                       }
+                       $this->resetServices();
+               }
+               $this->assertSame( $expected,
+                       Language::fetchLanguageNames( ...$otherArgs )[strtolower( $code )] ?? '' );
+               $this->assertSame( $expected, Language::fetchLanguageName( $code, ...$otherArgs ) );
+       }
+
+       private function getLanguageNames( ...$args ) {
+               return Language::fetchLanguageNames( ...$args );
+       }
+
+       private function getLanguageName( ...$args ) {
+               return Language::fetchLanguageName( ...$args );
+       }
+
+       private static function getFileName( ...$args ) {
+               return Language::getFileName( ...$args );
+       }
+
+       private static function getMessagesFileName( $code ) {
+               return Language::getMessagesFileName( $code );
+       }
+
+       private static function getJsonMessagesFileName( $code ) {
+               return Language::getJsonMessagesFileName( $code );
+       }
+
+       /**
+        * @todo This really belongs in the cldr extension's tests.
+        *
+        * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag
+        * @covers Language::isKnownLanguageTag
+        */
+       public function testIsKnownLanguageTag_cldr() {
+               if ( !class_exists( 'LanguageNames' ) ) {
+                       $this->markTestSkipped( 'The LanguageNames class is not available. '
+                               . 'The CLDR extension is probably not installed.' );
+               }
+
+               // We need to restore the extension's hook that we removed.
+               $this->setMwGlobals( 'wgHooks', $this->origHooks );
+
+               // "pal" is an ancient language, which probably will not appear in Names.php, but appears in
+               // CLDR in English
+               $this->assertTrue( Language::isKnownLanguageTag( 'pal' ) );
+       }
 }
diff --git a/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php b/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php
new file mode 100644 (file)
index 0000000..6fbd4a2
--- /dev/null
@@ -0,0 +1,66 @@
+<?php
+
+use MediaWiki\Config\ServiceOptions;
+use MediaWiki\Languages\LanguageNameUtils;
+
+class LanguageNameUtilsTest extends MediaWikiUnitTestCase {
+       /**
+        * @param array $optionsArray
+        */
+       private static function newObj( array $optionsArray = [] ) : LanguageNameUtils {
+               return new LanguageNameUtils( new ServiceOptions(
+                       LanguageNameUtils::$constructorOptions,
+                       $optionsArray,
+                       [
+                               'ExtraLanguageNames' => [],
+                               'LanguageCode' => 'en',
+                               'UsePigLatinVariant' => false,
+                       ]
+               ) );
+       }
+
+       use LanguageNameUtilsTestTrait;
+
+       private function isSupportedLanguage( $code ) {
+               return $this->newObj()->isSupportedLanguage( $code );
+       }
+
+       private function isValidCode( $code ) {
+               return $this->newObj()->isValidCode( $code );
+       }
+
+       private function isValidBuiltInCode( $code ) {
+               return $this->newObj()->isValidBuiltInCode( $code );
+       }
+
+       private function isKnownLanguageTag( $code ) {
+               return $this->newObj()->isKnownLanguageTag( $code );
+       }
+
+       private function assertGetLanguageNames( array $options, $expected, $code, ...$otherArgs ) {
+               $this->assertSame( $expected, $this->newObj( $options )
+                       ->getLanguageNames( ...$otherArgs )[strtolower( $code )] ?? '' );
+               $this->assertSame( $expected,
+                       $this->newObj( $options )->getLanguageName( $code, ...$otherArgs ) );
+       }
+
+       private function getLanguageNames( ...$args ) {
+               return $this->newObj()->getLanguageNames( ...$args );
+       }
+
+       private function getLanguageName( ...$args ) {
+               return $this->newObj()->getLanguageName( ...$args );
+       }
+
+       private static function getFileName( ...$args ) {
+               return self::newObj()->getFileName( ...$args );
+       }
+
+       private static function getMessagesFileName( $code ) {
+               return self::newObj()->getMessagesFileName( $code );
+       }
+
+       private static function getJsonMessagesFileName( $code ) {
+               return self::newObj()->getJsonMessagesFileName( $code );
+       }
+}
diff --git a/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php b/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php
new file mode 100644 (file)
index 0000000..bd777e9
--- /dev/null
@@ -0,0 +1,555 @@
+<?php
+
+use MediaWiki\Languages\LanguageNameUtils;
+
+const AUTONYMS = LanguageNameUtils::AUTONYMS;
+const ALL = LanguageNameUtils::ALL;
+const DEFINED = LanguageNameUtils::DEFINED;
+const SUPPORTED = LanguageNameUtils::SUPPORTED;
+
+/**
+ * For code shared between LanguageNameUtilsTest and LanguageTest.
+ */
+trait LanguageNameUtilsTestTrait {
+       abstract protected function isSupportedLanguage( $code );
+
+       /**
+        * @dataProvider provideIsSupportedLanguage
+        * @covers MediaWiki\Languages\LanguageNameUtils::__construct
+        * @covers MediaWiki\Languages\LanguageNameUtils::isSupportedLanguage
+        * @covers Language::isSupportedLanguage
+        */
+       public function testIsSupportedLanguage( $code, $expected ) {
+               $this->assertSame( $expected, $this->isSupportedLanguage( $code ) );
+       }
+
+       public static function provideIsSupportedLanguage() {
+               return [
+                       'en' => [ 'en', true ],
+                       'fi' => [ 'fi', true ],
+                       'bunny' => [ 'bunny', false ],
+                       'qqq' => [ 'qqq', false ],
+                       'uppercase is not considered supported' => [ 'FI', false ],
+               ];
+       }
+
+       abstract protected function isValidCode( $code );
+
+       /**
+        * We don't test that the result is cached, because that should only be noticeable if the
+        * configuration changes in between calls, and 1) that should never happen in normal operation,
+        * 2) if you do it you deserve whatever you get, and 3) once the static Language method is
+        * dropped and the invalid title regex is moved to something injected instead of a static call,
+        * the cache will be undetectable.
+        *
+        * @todo Should we test changes to $wgLegalTitleChars here? Does anybody actually change that?
+        * Is it possible to change it usefully without breaking everything?
+        *
+        * @dataProvider provideIsValidCode
+        * @covers MediaWiki\Languages\LanguageNameUtils::isValidCode
+        * @covers Language::isValidCode
+        *
+        * @param string $code
+        * @param bool $expected
+        */
+       public function testIsValidCode( $code, $expected ) {
+               $this->assertSame( $expected, $this->isValidCode( $code ) );
+       }
+
+       public static function provideIsValidCode() {
+               $ret = [
+                       'en' => [ 'en', true ],
+                       'en-GB' => [ 'en-GB', true ],
+                       'Funny chars' => [ "%!$()*,-.;=?@^_`~\x80\xA2\xFF+", true ],
+                       'Percent escape not allowed' => [ 'a%aF', false ],
+                       'Percent with only one following char is okay' => [ '%a', true ],
+                       'Percent with non-hex following chars is okay' => [ '%AG', true ],
+                       'Named char reference "a"' => [ 'a&a', false ],
+                       'Named char reference "A"' => [ 'a&A', false ],
+                       'Named char reference "0"' => [ 'a&0', false ],
+                       'Named char reference non-ASCII' => [ "a&\x92", false ],
+                       'Numeric char reference' => [ "a&#0", false ],
+                       'Hex char reference 0' => [ "a&#x0", false ],
+                       'Hex char reference A' => [ "a&#xA", false ],
+                       'Lone ampersand is valid for title but not lang code' => [ '&', false ],
+                       'Ampersand followed by just # is valid for title but not lang code' => [ '&#', false ],
+                       'Ampersand followed by # and non-x/digit is valid for title but not lang code' =>
+                               [ '&#a', false ],
+               ];
+               $disallowedChars = ":/\\\000&<>'\"";
+               foreach ( str_split( $disallowedChars ) as $char ) {
+                       $ret["Disallowed character $char"] = [ "a{$char}a", false ];
+               }
+               return $ret;
+       }
+
+       abstract protected function isValidBuiltInCode( $code );
+
+       /**
+        * @dataProvider provideIsValidBuiltInCode
+        * @covers MediaWiki\Languages\LanguageNameUtils::isValidBuiltInCode
+        * @covers Language::isValidBuiltInCode
+        *
+        * @param string $code
+        * @param bool $expected
+        */
+       public function testIsValidBuiltInCode( $code, $expected ) {
+               $this->assertSame( $expected, $this->isValidBuiltInCode( $code ) );
+       }
+
+       public static function provideIsValidBuiltInCode() {
+               return [
+                       'Two letters, lowercase' => [ 'fr', true ],
+                       'Two letters, uppercase' => [ 'EN', false ],
+                       'Three letters' => [ 'tyv', true ],
+                       'With dash' => [ 'be-tarask', true ],
+                       'With extension (two dashes)' => [ 'be-x-old', true ],
+                       'Reject underscores' => [ 'be_tarask', false ],
+                       'One letter' => [ 'a', false ],
+                       'Only digits' => [ '00', true ],
+                       'Only dashes' => [ '--', true ],
+                       'Unreasonably long' => [ str_repeat( 'x', 100 ), true ],
+                       'qqq' => [ 'qqq', true ],
+               ];
+       }
+
+       abstract protected function isKnownLanguageTag( $code );
+
+       /**
+        * @dataProvider provideIsKnownLanguageTag
+        * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag
+        * @covers Language::isKnownLanguageTag
+        *
+        * @param string $code
+        * @param bool $expected
+        */
+       public function testIsKnownLanguageTag( $code, $expected ) {
+               $this->assertSame( $expected, $this->isKnownLanguageTag( $code ) );
+       }
+
+       public static function provideIsKnownLanguageTag() {
+               $invalidBuiltInCodes = array_filter( static::provideIsValidBuiltInCode(),
+                       function ( $arr ) {
+                               // If isValidBuiltInCode() returns false, we want to also, but if it returns true,
+                               // we could still return false from isKnownLanguageTag(), so skip those.
+                               return !$arr[1];
+                       }
+               );
+               return array_merge( $invalidBuiltInCodes, [
+                       'Simple code' => [ 'fr', true ],
+                       'An MW legacy tag' => [ 'bat-smg', true ],
+                       'An internal standard MW name, for which a legacy tag is used externally' =>
+                               [ 'sgs', true ],
+                       'Non-existent two-letter code' => [ 'mw', false ],
+                       'Very invalid language code' => [ 'foo"<bar', false ],
+               ] );
+       }
+
+       abstract protected function assertGetLanguageNames(
+               array $options, $expected, $code, ...$otherArgs
+       );
+
+       abstract protected function getLanguageNames( ...$args );
+
+       abstract protected function getLanguageName( ...$args );
+
+       /**
+        * @dataProvider provideGetLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        *
+        * @param string $expected
+        * @param string $code
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       public function testGetLanguageNames( $expected, $code, ...$otherArgs ) {
+               $this->assertGetLanguageNames( [], $expected, $code, ...$otherArgs );
+       }
+
+       public static function provideGetLanguageNames() {
+               // @todo There are probably lots of interesting tests to add here.
+               return [
+                       'Simple code' => [ 'Deutsch', 'de' ],
+                       'Simple code in a different language (doesn\'t work without hook)' =>
+                               [ 'Deutsch', 'de', 'fr' ],
+                       'Invalid code' => [ '', '&' ],
+                       'Pig Latin not enabled' => [ '', 'en-x-piglatin', AUTONYMS, ALL ],
+                       'qqq doesn\'t have a name' => [ '', 'qqq', AUTONYMS, ALL ],
+                       'An MW legacy tag is recognized' => [ 'žemaitėška', 'bat-smg' ],
+                       // @todo Is the next test's result desired?
+                       'An MW legacy tag is not supported' => [ '', 'bat-smg', AUTONYMS, SUPPORTED ],
+                       'An internal standard name, for which a legacy tag is used externally, is supported' =>
+                               [ 'žemaitėška', 'sgs', AUTONYMS, SUPPORTED ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideGetLanguageNames_withHook
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        *
+        * @param string $expected Expected return value of getLanguageName()
+        * @param string $code
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       public function testGetLanguageNames_withHook( $expected, $code, ...$otherArgs ) {
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function ( &$names, $inLanguage ) {
+                               switch ( $inLanguage ) {
+                               case 'de':
+                                       $names = [
+                                               'de' => 'Deutsch',
+                                               'en' => 'Englisch',
+                                               'fr' => 'Französisch',
+                                       ];
+                                       break;
+
+                               case 'en':
+                                       $names = [
+                                               'de' => 'German',
+                                               'en' => 'English',
+                                               'fr' => 'French',
+                                               'sqsqsqsq' => '!!?!',
+                                               'bat-smg' => 'Samogitian',
+                                       ];
+                                       break;
+
+                               case 'fr':
+                                       $names = [
+                                               'de' => 'allemand',
+                                               'en' => 'anglais',
+                                               // Deliberate mistake (no cedilla)
+                                               'fr' => 'francais',
+                                       ];
+                                       break;
+                               }
+                       }
+               );
+
+               // Really we could dispense with assertGetLanguageNames() and just call
+               // testGetLanguageNames() here, but it looks weird to call a test method from another test
+               // method.
+               $this->assertGetLanguageNames( [], $expected, $code, ...$otherArgs );
+       }
+
+       public static function provideGetLanguageNames_withHook() {
+               return [
+                       'Simple code in a different language' => [ 'allemand', 'de', 'fr' ],
+                       'Invalid inLanguage defaults to English' => [ 'German', 'de', '&' ],
+                       'If inLanguage not provided, default to autonym' => [ 'Deutsch', 'de' ],
+                       'Hooks ignored for explicitly-requested autonym' => [ 'français', 'fr', 'fr' ],
+                       'Hooks don\'t make a language supported' => [ '', 'bat-smg', 'en', SUPPORTED ],
+                       'Hooks don\'t make a language defined' => [ '', 'sqsqsqsq', 'en', DEFINED ],
+                       'Hooks do make a language name returned with ALL' => [ '!!?!', 'sqsqsqsq', 'en', ALL ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideGetLanguageNames_ExtraLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        *
+        * @param string $expected Expected return value of getLanguageName()
+        * @param string $code
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       public function testGetLanguageNames_ExtraLanguageNames( $expected, $code, ...$otherArgs ) {
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function ( &$names ) {
+                               $names['de'] = 'die deutsche Sprache';
+                       }
+               );
+               $this->assertGetLanguageNames(
+                       [ 'ExtraLanguageNames' => [ 'de' => 'deutsche Sprache', 'sqsqsqsq' => '!!?!' ] ],
+                       $expected, $code, ...$otherArgs
+               );
+       }
+
+       public static function provideGetLanguageNames_ExtraLanguageNames() {
+               return [
+                       'Simple extra language name' => [ '!!?!', 'sqsqsqsq' ],
+                       'Extra language is defined' => [ '!!?!', 'sqsqsqsq', AUTONYMS, DEFINED ],
+                       'Extra language is not supported' => [ '', 'sqsqsqsq', AUTONYMS, SUPPORTED ],
+                       'Extra language overrides default' => [ 'deutsche Sprache', 'de' ],
+                       'Extra language overrides hook for explicitly requested autonym' =>
+                               [ 'deutsche Sprache', 'de', 'de' ],
+                       'Hook overrides extra language for non-autonym' =>
+                               [ 'die deutsche Sprache', 'de', 'fr' ],
+               ];
+       }
+
+       /**
+        * Test that getLanguageNames() defaults to DEFINED, and getLanguageName() defaults to ALL.
+        *
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        */
+       public function testGetLanguageNames_parameterDefault() {
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function ( &$names ) {
+                               $names = [ 'sqsqsqsq' => '!!?!' ];
+                       }
+               );
+
+               // We use 'en' here because the hook is not run if we're requesting autonyms, although in
+               // this case (language that isn't defined by MediaWiki itself) that behavior seems wrong.
+               $this->assertArrayNotHasKey( 'sqsqsqsq', $this->getLanguageNames(), 'en' );
+
+               $this->assertSame( '!!?!', $this->getLanguageName( 'sqsqsqsq', 'en' ) );
+       }
+
+       /**
+        * @dataProvider provideGetLanguageNames_sorted
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers Language::fetchLanguageNames
+        *
+        * @param mixed ...$args To pass to method
+        */
+       public function testGetLanguageNames_sorted( ...$args ) {
+               $names = $this->getLanguageNames( ...$args );
+               $sortedNames = $names;
+               ksort( $sortedNames );
+               $this->assertSame( $sortedNames, $names );
+       }
+
+       public static function provideGetLanguageNames_sorted() {
+               return [
+                       [],
+                       [ AUTONYMS ],
+                       [ AUTONYMS, 'mw' ],
+                       [ AUTONYMS, ALL ],
+                       [ AUTONYMS, SUPPORTED ],
+                       [ 'he', 'mw' ],
+                       [ 'he', ALL ],
+                       [ 'he', SUPPORTED ],
+               ];
+       }
+
+       /**
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers Language::fetchLanguageNames
+        */
+       public function testGetLanguageNames_hookNotCalledForAutonyms() {
+               $count = 0;
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function () use ( &$count ) {
+                               $count++;
+                       }
+               );
+
+               $this->getLanguageNames();
+               $this->assertSame( 0, $count, 'Hook must not be called for autonyms' );
+
+               // We test elsewhere that the hook works, but the following verifies that our test is
+               // working and $count isn't being incremented above only because we're checking autonyms.
+               $this->getLanguageNames( 'fr' );
+               $this->assertSame( 1, $count, 'Hook must be called for non-autonyms' );
+       }
+
+       /**
+        * @dataProvider provideGetLanguageNames_pigLatin
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        *
+        * @param string $expected
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       public function testGetLanguageNames_pigLatin( $expected, ...$otherArgs ) {
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function ( &$names, $inLanguage ) {
+                               switch ( $inLanguage ) {
+                               case 'fr':
+                                       $names = [ 'en-x-piglatin' => 'latin de cochons' ];
+                                       break;
+
+                               case 'en-x-piglatin':
+                                       // Deliberately lowercase
+                                       $names = [ 'en-x-piglatin' => 'igpay atinlay' ];
+                                       break;
+                               }
+                       }
+               );
+
+               $this->assertGetLanguageNames(
+                       [ 'UsePigLatinVariant' => true ], $expected, 'en-x-piglatin', ...$otherArgs );
+       }
+
+       public static function provideGetLanguageNames_pigLatin() {
+               return [
+                       'Simple test' => [ 'Igpay Atinlay' ],
+                       'Not supported' => [ '', AUTONYMS, SUPPORTED ],
+                       'Foreign language' => [ 'latin de cochons', 'fr' ],
+                       'Hook doesn\'t override explicit autonym' =>
+                               [ 'Igpay Atinlay', 'en-x-piglatin', 'en-x-piglatin' ],
+               ];
+       }
+
+       /**
+        * Just for the sake of completeness, test that ExtraLanguageNames will not override the name
+        * for pig Latin. Nobody actually cares about this and if anything current behavior is probably
+        * wrong, but once we're testing the whole file we may as well be comprehensive.
+        *
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        */
+       public function testGetLanguageNames_pigLatinAndExtraLanguageNames() {
+               $this->assertGetLanguageNames(
+                       [
+                               'UsePigLatinVariant' => true,
+                               'ExtraLanguageNames' => [ 'en-x-piglatin' => 'igpay atinlay' ]
+                       ],
+                       'Igpay Atinlay',
+                       'en-x-piglatin'
+               );
+       }
+
+       abstract protected static function getFileName( ...$args );
+
+       /**
+        * @dataProvider provideGetFileName
+        * @covers MediaWiki\Languages\LanguageNameUtils::getFileName
+        * @covers Language::getFileName
+        *
+        * @param string $expected
+        * @param mixed ...$args To pass to method
+        */
+       public function testGetFileName( $expected, ...$args ) {
+               $this->assertSame( $expected, $this->getFileName( ...$args ) );
+       }
+
+       public static function provideGetFileName() {
+               return [
+                       'Simple case' => [ 'MessagesXx.php', 'Messages', 'xx' ],
+                       'With extension' => [ 'MessagesXx.ext', 'Messages', 'xx', '.ext' ],
+                       'Replacing dashes' => [ '!__?', '!', '--', '?' ],
+                       'Empty prefix and extension' => [ 'Xx', '', 'xx', '' ],
+                       'Uppercase only first letter' => [ 'Messages_a.php', 'Messages', '-a' ],
+               ];
+       }
+
+       abstract protected function getMessagesFileName( $code );
+
+       /**
+        * @dataProvider provideGetMessagesFileName
+        * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
+        * @covers Language::getMessagesFileName
+        *
+        * @param string $code
+        * @param string $expected
+        */
+       public function testGetMessagesFileName( $code, $expected ) {
+               $this->assertSame( $expected, $this->getMessagesFileName( $code ) );
+       }
+
+       public static function provideGetMessagesFileName() {
+               global $IP;
+               return [
+                       'Simple case' => [ 'en', "$IP/languages/messages/MessagesEn.php" ],
+                       'Replacing dashes' => [ '--', "$IP/languages/messages/Messages__.php" ],
+                       'Uppercase only first letter' => [ '-a', "$IP/languages/messages/Messages_a.php" ],
+               ];
+       }
+
+       /**
+        * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
+        * @covers Language::getMessagesFileName
+        */
+       public function testGetMessagesFileName_withHook() {
+               $called = 0;
+
+               $this->setTemporaryHook( 'Language::getMessagesFileName',
+                       function ( $code, &$file ) use ( &$called ) {
+                               global $IP;
+
+                               $called++;
+
+                               $this->assertSame( 'ab-cd', $code );
+                               $this->assertSame( "$IP/languages/messages/MessagesAb_cd.php", $file );
+                               $file = 'bye-bye';
+                       }
+               );
+
+               $this->assertSame( 'bye-bye', $this->getMessagesFileName( 'ab-cd' ) );
+               $this->assertSame( 1, $called );
+       }
+
+       abstract protected function getJsonMessagesFileName( $code );
+
+       /**
+        * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName
+        * @covers Language::getJsonMessagesFileName
+        */
+       public function testGetJsonMessagesFileName() {
+               global $IP;
+
+               // Not so much to test here, one test seems to be enough
+               $expected = "$IP/languages/i18n/en--123.json";
+               $this->assertSame( $expected, $this->getJsonMessagesFileName( 'en--123' ) );
+       }
+
+       /**
+        * getFileName, getMessagesFileName, and getJsonMessagesFileName all throw if they get an
+        * invalid code. To save boilerplate, test them all in one method.
+        *
+        * @dataProvider provideExceptionFromInvalidCode
+        * @covers MediaWiki\Languages\LanguageNameUtils::getFileName
+        * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
+        * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName
+        * @covers Language::getFileName
+        * @covers Language::getMessagesFileName
+        * @covers Language::getJsonMessagesFileName
+        *
+        * @param callable $callback Will throw when passed $code
+        * @param string $code
+        */
+       public function testExceptionFromInvalidCode( $callback, $code ) {
+               $this->setExpectedException( MWException::class, "Invalid language code \"$code\"" );
+
+               $callback( $code );
+       }
+
+       public static function provideExceptionFromInvalidCode() {
+               $ret = [];
+               foreach ( static::provideIsValidBuiltInCode() as $desc => list( $code, $valid ) ) {
+                       if ( $valid ) {
+                               // Won't get an exception from this one
+                               continue;
+                       }
+
+                       // For getFileName, we define an anonymous function because of the extra first param
+                       $ret["getFileName: $desc"] = [
+                               function ( $code ) {
+                                       return static::getFileName( 'Messages', $code );
+                               },
+                               $code
+                       ];
+
+                       $ret["getMessagesFileName: $desc"] =
+                               [ [ static::class, 'getMessagesFileName' ], $code ];
+
+                       $ret["getJsonMessagesFileName: $desc"] =
+                               [ [ static::class, 'getJsonMessagesFileName' ], $code ];
+               }
+               return $ret;
+       }
+}