Merge "Split some Language methods to LanguageNameUtils" into REL1_34
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Wed, 9 Oct 2019 09:46:53 +0000 (09:46 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 9 Oct 2019 09:46:53 +0000 (09:46 +0000)
17 files changed:
RELEASE-NOTES-1.34
autoload.php
includes/MediaWikiServices.php
includes/ServiceWiring.php
includes/cache/localisation/LocalisationCache.php
includes/language/LanguageCode.php
includes/language/LanguageNameUtils.php [new file with mode: 0644]
languages/Language.php
languages/data/Names.php
maintenance/rebuildLocalisationCache.php
tests/common/TestsAutoLoader.php
tests/phpunit/MediaWikiUnitTestCase.php
tests/phpunit/includes/api/ApiQuerySiteinfoTest.php
tests/phpunit/includes/cache/LocalisationCacheTest.php
tests/phpunit/languages/LanguageTest.php
tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php [new file with mode: 0644]
tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php [new file with mode: 0644]

index 7a17a99..80f1629 100644 (file)
@@ -610,6 +610,11 @@ because of Phabricator reports.
   remove HHVM support from MediaWiki, which started in MediaWiki 1.31.
 * Language::getLocalisationCache() is deprecated. Use MediaWikiServices
   instead.
+* The following Language methods are deprecated: isSupportedLanguage,
+  isValidCode, isValidBuiltInCode, isKnownLanguageTag, fetchLanguageNames,
+  fetchLanguageName, getFileName, getMessagesFileName, getJsonMessagesFileName.
+  Use the new LanguageNameUtils class instead. (Note that fetchLanguageName(s)
+  are called getLanguageName(s) in the new class.)
 
 === Other changes in 1.34 ===
 * Added option to specify "Various authors" as author in extension credits using
index dc57ff6..b2147a5 100644 (file)
@@ -881,6 +881,7 @@ $wgAutoloadLocalClasses = [
        'MediaWiki\\Languages\\Data\\CrhExceptions' => __DIR__ . '/languages/data/CrhExceptions.php',
        'MediaWiki\\Languages\\Data\\Names' => __DIR__ . '/languages/data/Names.php',
        'MediaWiki\\Languages\\Data\\ZhConversion' => __DIR__ . '/languages/data/ZhConversion.php',
+       'MediaWiki\\Languages\\LanguageNameUtils' => __DIR__ . '/includes/language/LanguageNameUtils.php',
        'MediaWiki\\Logger\\ConsoleLogger' => __DIR__ . '/includes/debug/logger/ConsoleLogger.php',
        'MediaWiki\\Logger\\ConsoleSpi' => __DIR__ . '/includes/debug/logger/ConsoleSpi.php',
        'MediaWiki\\Logger\\LegacyLogger' => __DIR__ . '/includes/debug/logger/LegacyLogger.php',
index a32fbef..3214e6a 100644 (file)
@@ -21,6 +21,7 @@ use MediaWiki\FileBackend\FSFile\TempFSFileFactory;
 use MediaWiki\FileBackend\LockManager\LockManagerGroupFactory;
 use MediaWiki\Http\HttpRequestFactory;
 use PasswordReset;
+use MediaWiki\Languages\LanguageNameUtils;
 use Wikimedia\Message\IMessageFormatterFactory;
 use MediaWiki\Page\MovePageFactory;
 use MediaWiki\Permissions\PermissionManager;
@@ -627,6 +628,14 @@ class MediaWikiServices extends ServiceContainer {
                return $this->getService( 'InterwikiLookup' );
        }
 
+       /**
+        * @since 1.34
+        * @return LanguageNameUtils
+        */
+       public function getLanguageNameUtils() {
+               return $this->getService( 'LanguageNameUtils' );
+       }
+
        /**
         * @since 1.28
         * @return LinkCache
index e0db05b..1ee17b2 100644 (file)
@@ -56,6 +56,7 @@ use MediaWiki\FileBackend\LockManager\LockManagerGroupFactory;
 use MediaWiki\Http\HttpRequestFactory;
 use MediaWiki\Interwiki\ClassicInterwikiLookup;
 use MediaWiki\Interwiki\InterwikiLookup;
+use MediaWiki\Languages\LanguageNameUtils;
 use MediaWiki\Linker\LinkRenderer;
 use MediaWiki\Linker\LinkRendererFactory;
 use MediaWiki\Logger\LoggerFactory;
@@ -259,6 +260,13 @@ return [
                );
        },
 
+       'LanguageNameUtils' => function ( MediaWikiServices $services ) : LanguageNameUtils {
+               return new LanguageNameUtils( new ServiceOptions(
+                       LanguageNameUtils::$constructorOptions,
+                       $services->getMainConfig()
+               ) );
+       },
+
        'LinkCache' => function ( MediaWikiServices $services ) : LinkCache {
                return new LinkCache(
                        $services->getTitleFormatter(),
@@ -313,7 +321,8 @@ return [
                        $logger,
                        [ function () use ( $services ) {
                                $services->getResourceLoader()->getMessageBlobStore()->clear();
-                       } ]
+                       } ],
+                       $services->getLanguageNameUtils()
                );
        },
 
index a9e6969..49b2a47 100644 (file)
@@ -23,6 +23,7 @@
 use CLDRPluralRuleParser\Evaluator;
 use CLDRPluralRuleParser\Error as CLDRPluralRuleError;
 use MediaWiki\Config\ServiceOptions;
+use MediaWiki\Languages\LanguageNameUtils;
 use Psr\Log\LoggerInterface;
 
 /**
@@ -73,6 +74,9 @@ class LocalisationCache {
        /** @var callable[] See comment for parameter in constructor */
        private $clearStoreCallbacks;
 
+       /** @var LanguageNameUtils */
+       private $langNameUtils;
+
        /**
         * A 2-d associative array, code/key, where presence indicates that the item
         * is loaded. Value arbitrary.
@@ -244,13 +248,15 @@ class LocalisationCache {
         * @param callable[] $clearStoreCallbacks To be called whenever the cache is cleared. Can be
         *   used to clear other caches that depend on this one, such as ResourceLoader's
         *   MessageBlobStore.
+        * @param LanguageNameUtils $langNameUtils
         * @throws MWException
         */
        function __construct(
                ServiceOptions $options,
                LCStore $store,
                LoggerInterface $logger,
-               array $clearStoreCallbacks = []
+               array $clearStoreCallbacks,
+               LanguageNameUtils $langNameUtils
        ) {
                $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
 
@@ -258,6 +264,7 @@ class LocalisationCache {
                $this->store = $store;
                $this->logger = $logger;
                $this->clearStoreCallbacks = $clearStoreCallbacks;
+               $this->langNameUtils = $langNameUtils;
 
                // Keep this separate from $this->options so it can be mutable
                $this->manualRecache = $options->get( 'manualRecache' );
@@ -470,7 +477,7 @@ class LocalisationCache {
                $this->initialisedLangs[$code] = true;
 
                # If the code is of the wrong form for a Messages*.php file, do a shallow fallback
-               if ( !Language::isValidBuiltInCode( $code ) ) {
+               if ( !$this->langNameUtils->isValidBuiltInCode( $code ) ) {
                        $this->initShallowFallback( $code, 'en' );
 
                        return;
@@ -478,7 +485,7 @@ class LocalisationCache {
 
                # Recache the data if necessary
                if ( !$this->manualRecache && $this->isExpired( $code ) ) {
-                       if ( Language::isSupportedLanguage( $code ) ) {
+                       if ( $this->langNameUtils->isSupportedLanguage( $code ) ) {
                                $this->recache( $code );
                        } elseif ( $code === 'en' ) {
                                throw new MWException( 'MessagesEn.php is missing.' );
@@ -707,7 +714,7 @@ class LocalisationCache {
                global $IP;
 
                // This reads in the PHP i18n file with non-messages l10n data
-               $fileName = Language::getMessagesFileName( $code );
+               $fileName = $this->langNameUtils->getMessagesFileName( $code );
                if ( !file_exists( $fileName ) ) {
                        $data = [];
                } else {
index 7d954d3..1d2f0b4 100644 (file)
@@ -21,7 +21,6 @@
 
 /**
  * Methods for dealing with language codes.
- * @todo Move some of the code-related static methods out of Language into this class
  *
  * @since 1.29
  * @ingroup Language
diff --git a/includes/language/LanguageNameUtils.php b/includes/language/LanguageNameUtils.php
new file mode 100644 (file)
index 0000000..08d9ab3
--- /dev/null
@@ -0,0 +1,319 @@
+<?php
+/**
+ * Internationalisation code.
+ * See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * @defgroup Language Language
+ */
+
+namespace MediaWiki\Languages;
+
+use HashBagOStuff;
+use Hooks;
+use MediaWiki\Config\ServiceOptions;
+use MediaWikiTitleCodec;
+use MWException;
+use Wikimedia\Assert\Assert;
+
+/**
+ * @ingroup Language
+ *
+ * A service that provides utilities to do with language names and codes.
+ *
+ * @since 1.34
+ */
+class LanguageNameUtils {
+       /**
+        * Return autonyms in getLanguageName(s).
+        */
+       const AUTONYMS = null;
+
+       /**
+        * Return all known languages in getLanguageName(s).
+        */
+       const ALL = 'all';
+
+       /**
+        * Return in getLanguageName(s) only the languages that are defined by MediaWiki.
+        */
+       const DEFINED = 'mw';
+
+       /**
+        * Return in getLanguageName(s) only the languages for which we have at least some localisation.
+        */
+       const SUPPORTED = 'mwfile';
+
+       /** @var ServiceOptions */
+       private $options;
+
+       /**
+        * Cache for language names
+        * @var HashBagOStuff|null
+        */
+       private $languageNameCache;
+
+       /**
+        * Cache for validity of language codes
+        * @var array
+        */
+       private $validCodeCache = [];
+
+       public static $constructorOptions = [
+               'ExtraLanguageNames',
+               'UsePigLatinVariant',
+       ];
+
+       /**
+        * @param ServiceOptions $options
+        */
+       public function __construct( ServiceOptions $options ) {
+               $options->assertRequiredOptions( self::$constructorOptions );
+               $this->options = $options;
+       }
+
+       /**
+        * Checks whether any localisation is available for that language tag in MediaWiki
+        * (MessagesXx.php or xx.json exists).
+        *
+        * @param string $code Language tag (in lower case)
+        * @return bool Whether language is supported
+        */
+       public function isSupportedLanguage( $code ) {
+               if ( !$this->isValidBuiltInCode( $code ) ) {
+                       return false;
+               }
+
+               if ( $code === 'qqq' ) {
+                       // Special code for internal use, not supported even though there is a qqq.json
+                       return false;
+               }
+
+               return is_readable( $this->getMessagesFileName( $code ) ) ||
+                       is_readable( $this->getJsonMessagesFileName( $code ) );
+       }
+
+       /**
+        * Returns true if a language code string is of a valid form, whether or not it exists. This
+        * includes codes which are used solely for customisation via the MediaWiki namespace.
+        *
+        * @param string $code
+        *
+        * @return bool
+        */
+       public function isValidCode( $code ) {
+               Assert::parameterType( 'string', $code, '$code' );
+               if ( !isset( $this->validCodeCache[$code] ) ) {
+                       // People think language codes are HTML-safe, so enforce it.  Ideally we should only
+                       // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks.  See bugs
+                       // T39564, T39587, T38938.
+                       $this->validCodeCache[$code] =
+                               // Protect against path traversal
+                               strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) &&
+                               !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code );
+               }
+               return $this->validCodeCache[$code];
+       }
+
+       /**
+        * Returns true if a language code is of a valid form for the purposes of internal customisation
+        * of MediaWiki, via Messages*.php or *.json.
+        *
+        * @param string $code
+        * @return bool
+        */
+       public function isValidBuiltInCode( $code ) {
+               Assert::parameterType( 'string', $code, '$code' );
+
+               return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
+       }
+
+       /**
+        * Returns true if a language code is an IETF tag known to MediaWiki.
+        *
+        * @param string $tag
+        *
+        * @return bool
+        */
+       public function isKnownLanguageTag( $tag ) {
+               // Quick escape for invalid input to avoid exceptions down the line when code tries to
+               // process tags which are not valid at all.
+               if ( !$this->isValidBuiltInCode( $tag ) ) {
+                       return false;
+               }
+
+               if ( isset( Data\Names::$names[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) {
+                       return true;
+               }
+
+               return false;
+       }
+
+       /**
+        * Get an array of language names, indexed by code.
+        * @param null|string $inLanguage Code of language in which to return the names
+        *   Use self::AUTONYMS for autonyms (native names)
+        * @param string $include One of:
+        *   self::ALL all available languages
+        *   self::DEFINED only if the language is defined in MediaWiki or wgExtraLanguageNames
+        *     (default)
+        *   self::SUPPORTED only if the language is in self::DEFINED *and* has a message file
+        * @return array Language code => language name (sorted by key)
+        */
+       public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) {
+               $cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage;
+               $cacheKey .= ":$include";
+               if ( !$this->languageNameCache ) {
+                       $this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] );
+               }
+
+               $ret = $this->languageNameCache->get( $cacheKey );
+               if ( !$ret ) {
+                       $ret = $this->getLanguageNamesUncached( $inLanguage, $include );
+                       $this->languageNameCache->set( $cacheKey, $ret );
+               }
+               return $ret;
+       }
+
+       /**
+        * Uncached helper for getLanguageNames
+        * @param null|string $inLanguage As getLanguageNames
+        * @param string $include As getLanguageNames
+        * @return array Language code => language name (sorted by key)
+        */
+       private function getLanguageNamesUncached( $inLanguage, $include ) {
+               // If passed an invalid language code to use, fallback to en
+               if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) {
+                       $inLanguage = 'en';
+               }
+
+               $names = [];
+
+               if ( $inLanguage !== self::AUTONYMS ) {
+                       # TODO: also include for self::AUTONYMS, when this code is more efficient
+                       Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] );
+               }
+
+               $mwNames = $this->options->get( 'ExtraLanguageNames' ) + Data\Names::$names;
+               if ( $this->options->get( 'UsePigLatinVariant' ) ) {
+                       // Pig Latin (for variant development)
+                       $mwNames['en-x-piglatin'] = 'Igpay Atinlay';
+               }
+
+               foreach ( $mwNames as $mwCode => $mwName ) {
+                       # - Prefer own MediaWiki native name when not using the hook
+                       # - For other names just add if not added through the hook
+                       if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
+                               $names[$mwCode] = $mwName;
+                       }
+               }
+
+               if ( $include === self::ALL ) {
+                       ksort( $names );
+                       return $names;
+               }
+
+               $returnMw = [];
+               $coreCodes = array_keys( $mwNames );
+               foreach ( $coreCodes as $coreCode ) {
+                       $returnMw[$coreCode] = $names[$coreCode];
+               }
+
+               if ( $include === self::SUPPORTED ) {
+                       $namesMwFile = [];
+                       # We do this using a foreach over the codes instead of a directory loop so that messages
+                       # files in extensions will work correctly.
+                       foreach ( $returnMw as $code => $value ) {
+                               if ( is_readable( $this->getMessagesFileName( $code ) ) ||
+                                       is_readable( $this->getJsonMessagesFileName( $code ) )
+                               ) {
+                                       $namesMwFile[$code] = $names[$code];
+                               }
+                       }
+
+                       ksort( $namesMwFile );
+                       return $namesMwFile;
+               }
+
+               ksort( $returnMw );
+               # self::DEFINED option; default if it's not one of the other two options
+               # (self::ALL/self::SUPPORTED)
+               return $returnMw;
+       }
+
+       /**
+        * @param string $code The code of the language for which to get the name
+        * @param null|string $inLanguage Code of language in which to return the name (self::AUTONYMS
+        *   for autonyms)
+        * @param string $include See getLanguageNames(), except this defaults to self::ALL instead of
+        *   self::DEFINED
+        * @return string Language name or empty
+        * @since 1.20
+        */
+       public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) {
+               $code = strtolower( $code );
+               $array = $this->getLanguageNames( $inLanguage, $include );
+               return $array[$code] ?? '';
+       }
+
+       /**
+        * Get the name of a file for a certain language code
+        * @param string $prefix Prepend this to the filename
+        * @param string $code Language code
+        * @param string $suffix Append this to the filename
+        * @throws MWException
+        * @return string $prefix . $mangledCode . $suffix
+        */
+       public function getFileName( $prefix, $code, $suffix = '.php' ) {
+               if ( !$this->isValidBuiltInCode( $code ) ) {
+                       throw new MWException( "Invalid language code \"$code\"" );
+               }
+
+               return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
+       }
+
+       /**
+        * @param string $code
+        * @return string
+        */
+       public function getMessagesFileName( $code ) {
+               global $IP;
+               $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' );
+               Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] );
+               return $file;
+       }
+
+       /**
+        * @param string $code
+        * @return string
+        * @throws MWException
+        */
+       public function getJsonMessagesFileName( $code ) {
+               global $IP;
+
+               if ( !$this->isValidBuiltInCode( $code ) ) {
+                       throw new MWException( "Invalid language code \"$code\"" );
+               }
+
+               return "$IP/languages/i18n/$code.json";
+       }
+}
index a8950f5..39b5f73 100644 (file)
@@ -27,8 +27,8 @@
  */
 
 use CLDRPluralRuleParser\Evaluator;
+use MediaWiki\Languages\LanguageNameUtils;
 use MediaWiki\MediaWikiServices;
-use Wikimedia\Assert\Assert;
 
 /**
  * Internationalisation code
@@ -38,21 +38,24 @@ class Language {
        /**
         * Return autonyms in fetchLanguageName(s).
         * @since 1.32
+        * @deprecated since 1.34, LanguageNameUtils::AUTONYMS
         */
-       const AS_AUTONYMS = null;
+       const AS_AUTONYMS = LanguageNameUtils::AUTONYMS;
 
        /**
         * Return all known languages in fetchLanguageName(s).
         * @since 1.32
+        * @deprecated since 1.34, use LanguageNameUtils::ALL
         */
-       const ALL = 'all';
+       const ALL = LanguageNameUtils::ALL;
 
        /**
         * Return in fetchLanguageName(s) only the languages for which we have at
         * least some localisation.
         * @since 1.32
+        * @deprecated since 1.34, use LanguageNameUtils::SUPPORTED
         */
-       const SUPPORTED = 'mwfile';
+       const SUPPORTED = LanguageNameUtils::SUPPORTED;
 
        /**
         * @var LanguageConverter|FakeConverter
@@ -80,6 +83,9 @@ class Language {
        /** @var LocalisationCache */
        private $localisationCache;
 
+       /** @var LanguageNameUtils */
+       private $langNameUtils;
+
        public static $mLangObjCache = [];
 
        /**
@@ -94,6 +100,7 @@ class Language {
         */
        const STRICT_FALLBACKS = 1;
 
+       // TODO Make these const once we drop HHVM support (T192166)
        public static $mWeekdayMsgs = [
                'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
                'friday', 'saturday'
@@ -178,12 +185,6 @@ class Language {
         */
        private static $grammarTransformations;
 
-       /**
-        * Cache for language names
-        * @var HashBagOStuff|null
-        */
-       private static $languageNameCache;
-
        /**
         * Unicode directional formatting characters, for embedBidi()
         */
@@ -239,11 +240,12 @@ class Language {
         * @return Language
         */
        protected static function newFromCode( $code, $fallback = false ) {
-               if ( !self::isValidCode( $code ) ) {
+               $langNameUtils = MediaWikiServices::getInstance()->getLanguageNameUtils();
+               if ( !$langNameUtils->isValidCode( $code ) ) {
                        throw new MWException( "Invalid language code \"$code\"" );
                }
 
-               if ( !self::isValidBuiltInCode( $code ) ) {
+               if ( !$langNameUtils->isValidBuiltInCode( $code ) ) {
                        // It's not possible to customise this code with class files, so
                        // just return a Language object. This is to support uselang= hacks.
                        $lang = new Language;
@@ -262,7 +264,7 @@ class Language {
                // Keep trying the fallback list until we find an existing class
                $fallbacks = self::getFallbacksFor( $code );
                foreach ( $fallbacks as $fallbackCode ) {
-                       if ( !self::isValidBuiltInCode( $fallbackCode ) ) {
+                       if ( !$langNameUtils->isValidBuiltInCode( $fallbackCode ) ) {
                                throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" );
                        }
 
@@ -288,32 +290,25 @@ class Language {
                }
                if ( defined( 'MW_PHPUNIT_TEST' ) ) {
                        MediaWikiServices::getInstance()->resetServiceForTesting( 'LocalisationCache' );
+                       MediaWikiServices::getInstance()->resetServiceForTesting( 'LanguageNameUtils' );
                }
                self::$mLangObjCache = [];
                self::$fallbackLanguageCache = [];
                self::$grammarTransformations = null;
-               self::$languageNameCache = null;
        }
 
        /**
         * Checks whether any localisation is available for that language tag
         * in MediaWiki (MessagesXx.php exists).
         *
+        * @deprecated since 1.34, use LanguageNameUtils
         * @param string $code Language tag (in lower case)
         * @return bool Whether language is supported
         * @since 1.21
         */
        public static function isSupportedLanguage( $code ) {
-               if ( !self::isValidBuiltInCode( $code ) ) {
-                       return false;
-               }
-
-               if ( $code === 'qqq' ) {
-                       return false;
-               }
-
-               return is_readable( self::getMessagesFileName( $code ) ) ||
-                       is_readable( self::getJsonMessagesFileName( $code ) );
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->isSupportedLanguage( $code );
        }
 
        /**
@@ -381,63 +376,45 @@ class Language {
         * not it exists. This includes codes which are used solely for
         * customisation via the MediaWiki namespace.
         *
+        * @deprecated since 1.34, use LanguageNameUtils
+        *
         * @param string $code
         *
         * @return bool
         */
        public static function isValidCode( $code ) {
-               static $cache = [];
-               Assert::parameterType( 'string', $code, '$code' );
-               if ( !isset( $cache[$code] ) ) {
-                       // People think language codes are html safe, so enforce it.
-                       // Ideally we should only allow a-zA-Z0-9-
-                       // but, .+ and other chars are often used for {{int:}} hacks
-                       // see bugs T39564, T39587, T38938
-                       $cache[$code] =
-                               // Protect against path traversal
-                               strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code )
-                               && !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code );
-               }
-               return $cache[$code];
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()->isValidCode( $code );
        }
 
        /**
         * Returns true if a language code is of a valid form for the purposes of
         * internal customisation of MediaWiki, via Messages*.php or *.json.
         *
+        * @deprecated since 1.34, use LanguageNameUtils
+        *
         * @param string $code
         *
         * @since 1.18
         * @return bool
         */
        public static function isValidBuiltInCode( $code ) {
-               Assert::parameterType( 'string', $code, '$code' );
-
-               return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->isValidBuiltInCode( $code );
        }
 
        /**
         * Returns true if a language code is an IETF tag known to MediaWiki.
         *
+        * @deprecated since 1.34, use LanguageNameUtils
+        *
         * @param string $tag
         *
         * @since 1.21
         * @return bool
         */
        public static function isKnownLanguageTag( $tag ) {
-               // Quick escape for invalid input to avoid exceptions down the line
-               // when code tries to process tags which are not valid at all.
-               if ( !self::isValidBuiltInCode( $tag ) ) {
-                       return false;
-               }
-
-               if ( isset( MediaWiki\Languages\Data\Names::$names[$tag] )
-                       || self::fetchLanguageName( $tag, $tag ) !== ''
-               ) {
-                       return true;
-               }
-
-               return false;
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->isKnownLanguageTag( $tag );
        }
 
        /**
@@ -458,7 +435,9 @@ class Language {
                } else {
                        $this->mCode = str_replace( '_', '-', strtolower( substr( static::class, 8 ) ) );
                }
-               $this->localisationCache = MediaWikiServices::getInstance()->getLocalisationCache();
+               $services = MediaWikiServices::getInstance();
+               $this->localisationCache = $services->getLocalisationCache();
+               $this->langNameUtils = $services->getLanguageNameUtils();
        }
 
        /**
@@ -764,7 +743,7 @@ class Language {
                if ( $usemsg && wfMessage( $msg )->exists() ) {
                        return $this->getMessageFromDB( $msg );
                }
-               $name = self::fetchLanguageName( $code );
+               $name = $this->langNameUtils->getLanguageName( $code );
                if ( $name ) {
                        return $name; # if it's defined as a language name, show that
                } else {
@@ -825,6 +804,8 @@ class Language {
 
        /**
         * Get an array of language names, indexed by code.
+        *
+        * @deprecated since 1.34, use LanguageNameUtils::getLanguageNames
         * @param null|string $inLanguage Code of language in which to return the names
         *              Use self::AS_AUTONYMS for autonyms (native names)
         * @param string $include One of:
@@ -835,95 +816,12 @@ class Language {
         * @since 1.20
         */
        public static function fetchLanguageNames( $inLanguage = self::AS_AUTONYMS, $include = 'mw' ) {
-               $cacheKey = $inLanguage === self::AS_AUTONYMS ? 'null' : $inLanguage;
-               $cacheKey .= ":$include";
-               if ( self::$languageNameCache === null ) {
-                       self::$languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] );
-               }
-
-               $ret = self::$languageNameCache->get( $cacheKey );
-               if ( !$ret ) {
-                       $ret = self::fetchLanguageNamesUncached( $inLanguage, $include );
-                       self::$languageNameCache->set( $cacheKey, $ret );
-               }
-               return $ret;
-       }
-
-       /**
-        * Uncached helper for fetchLanguageNames
-        * @param null|string $inLanguage Code of language in which to return the names
-        *              Use self::AS_AUTONYMS for autonyms (native names)
-        * @param string $include One of:
-        *              self::ALL all available languages
-        *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
-        *              self::SUPPORTED only if the language is in 'mw' *and* has a message file
-        * @return array Language code => language name (sorted by key)
-        */
-       private static function fetchLanguageNamesUncached(
-               $inLanguage = self::AS_AUTONYMS,
-               $include = 'mw'
-       ) {
-               global $wgExtraLanguageNames, $wgUsePigLatinVariant;
-
-               // If passed an invalid language code to use, fallback to en
-               if ( $inLanguage !== self::AS_AUTONYMS && !self::isValidCode( $inLanguage ) ) {
-                       $inLanguage = 'en';
-               }
-
-               $names = [];
-
-               if ( $inLanguage ) {
-                       # TODO: also include when $inLanguage is null, when this code is more efficient
-                       Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] );
-               }
-
-               $mwNames = $wgExtraLanguageNames + MediaWiki\Languages\Data\Names::$names;
-               if ( $wgUsePigLatinVariant ) {
-                       // Pig Latin (for variant development)
-                       $mwNames['en-x-piglatin'] = 'Igpay Atinlay';
-               }
-
-               foreach ( $mwNames as $mwCode => $mwName ) {
-                       # - Prefer own MediaWiki native name when not using the hook
-                       # - For other names just add if not added through the hook
-                       if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
-                               $names[$mwCode] = $mwName;
-                       }
-               }
-
-               if ( $include === self::ALL ) {
-                       ksort( $names );
-                       return $names;
-               }
-
-               $returnMw = [];
-               $coreCodes = array_keys( $mwNames );
-               foreach ( $coreCodes as $coreCode ) {
-                       $returnMw[$coreCode] = $names[$coreCode];
-               }
-
-               if ( $include === self::SUPPORTED ) {
-                       $namesMwFile = [];
-                       # We do this using a foreach over the codes instead of a directory
-                       # loop so that messages files in extensions will work correctly.
-                       foreach ( $returnMw as $code => $value ) {
-                               if ( is_readable( self::getMessagesFileName( $code ) )
-                                       || is_readable( self::getJsonMessagesFileName( $code ) )
-                               ) {
-                                       $namesMwFile[$code] = $names[$code];
-                               }
-                       }
-
-                       ksort( $namesMwFile );
-                       return $namesMwFile;
-               }
-
-               ksort( $returnMw );
-               # 'mw' option; default if it's not one of the other two options (all/mwfile)
-               return $returnMw;
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getLanguageNames( $inLanguage, $include );
        }
 
        /**
+        * @deprecated since 1.34, use LanguageNameUtils::getLanguageName
         * @param string $code The code of the language for which to get the name
         * @param null|string $inLanguage Code of language in which to return the name
         *   (SELF::AS_AUTONYMS for autonyms)
@@ -936,9 +834,8 @@ class Language {
                $inLanguage = self::AS_AUTONYMS,
                $include = self::ALL
        ) {
-               $code = strtolower( $code );
-               $array = self::fetchLanguageNames( $inLanguage, $include );
-               return !array_key_exists( $code, $array ) ? '' : $array[$code];
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getLanguageName( $code, $inLanguage, $include );
        }
 
        /**
@@ -4444,6 +4341,8 @@ class Language {
 
        /**
         * Get the name of a file for a certain language code
+        *
+        * @deprecated since 1.34, use LanguageNameUtils
         * @param string $prefix Prepend this to the filename
         * @param string $code Language code
         * @param string $suffix Append this to the filename
@@ -4451,38 +4350,30 @@ class Language {
         * @return string $prefix . $mangledCode . $suffix
         */
        public static function getFileName( $prefix, $code, $suffix = '.php' ) {
-               if ( !self::isValidBuiltInCode( $code ) ) {
-                       throw new MWException( "Invalid language code \"$code\"" );
-               }
-
-               return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getFileName( $prefix, $code, $suffix );
        }
 
        /**
+        * @deprecated since 1.34, use LanguageNameUtils
         * @param string $code
         * @return string
         */
        public static function getMessagesFileName( $code ) {
-               global $IP;
-               $file = self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
-               Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] );
-               return $file;
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getMessagesFileName( $code );
        }
 
        /**
+        * @deprecated since 1.34, use LanguageNameUtils
         * @param string $code
         * @return string
         * @throws MWException
         * @since 1.23
         */
        public static function getJsonMessagesFileName( $code ) {
-               global $IP;
-
-               if ( !self::isValidBuiltInCode( $code ) ) {
-                       throw new MWException( "Invalid language code \"$code\"" );
-               }
-
-               return "$IP/languages/i18n/$code.json";
+               return MediaWikiServices::getInstance()->getLanguageNameUtils()
+                       ->getJsonMessagesFileName( $code );
        }
 
        /**
index 783a211..2ae38d5 100644 (file)
@@ -39,7 +39,7 @@ namespace MediaWiki\Languages\Data;
  * If you are adding support for such a language, add it also to
  * the relevant section in shared.css.
  *
- * Do not use this class directly. Use Language::fetchLanguageNames(), which
+ * Do not use this class directly. Use LanguageNameUtils::getLanguageNames(), which
  * includes support for the CLDR extension.
  *
  * @ingroup Language
index 07c5569..e5f799d 100644 (file)
@@ -97,7 +97,8 @@ class RebuildLocalisationCache extends Maintenance {
                        [ function () {
                                MediaWikiServices::getInstance()->getResourceLoader()
                                        ->getMessageBlobStore()->clear();
-                       } ]
+                       } ],
+                       MediaWikiServices::getInstance()->getLanguageNameUtils()
                );
 
                $allCodes = array_keys( Language::fetchLanguageNames( null, 'mwfile' ) );
index 1657e81..5968aed 100644 (file)
@@ -225,6 +225,7 @@ $wgAutoloadClasses += [
 
        # tests/phpunit/unit/includes/language
        'LanguageFallbackTestTrait' => "$testDir/phpunit/unit/includes/language/LanguageFallbackTestTrait.php",
+       'LanguageNameUtilsTestTrait' => "$testDir/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php",
 
        # tests/phpunit/unit/includes/libs/filebackend/fsfile
        'TempFSFileTestTrait' => "$testDir/phpunit/unit/includes/libs/filebackend/fsfile/TempFSFileTestTrait.php",
index fda986c..c63056d 100644 (file)
@@ -53,7 +53,8 @@ abstract class MediaWikiUnitTestCase extends TestCase {
                        'wgAutoloadLocalClasses',
                        // Need for LoggerFactory. Default is NullSpi.
                        'wgMWLoggerDefaultSpi',
-                       'wgAutoloadAttemptLowercase'
+                       'wgAutoloadAttemptLowercase',
+                       'wgLegalTitleChars'
                ];
        }
 
index 7f5ee0c..d1e8e84 100644 (file)
@@ -191,6 +191,7 @@ class ApiQuerySiteinfoTest extends ApiTestCase {
                        'wgExtraInterlanguageLinkPrefixes' => [ 'self' ],
                        'wgExtraLanguageNames' => [ 'self' => 'Recursion' ],
                ] );
+               $this->resetServices();
 
                MessageCache::singleton()->enable();
 
index af1ff86..4dd819a 100644 (file)
@@ -1,6 +1,7 @@
 <?php
 
 use MediaWiki\Config\ServiceOptions;
+use MediaWiki\Languages\LanguageNameUtils;
 use Psr\Log\NullLogger;
 
 /**
@@ -24,6 +25,37 @@ class LocalisationCacheTest extends MediaWikiTestCase {
        protected function getMockLocalisationCache() {
                global $IP;
 
+               $mockLangNameUtils = $this->createMock( LanguageNameUtils::class );
+               $mockLangNameUtils->method( 'isValidBuiltInCode' )->will( $this->returnCallback(
+                       function ( $code ) {
+                               // Copy-paste, but it's only one line
+                               return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
+                       }
+               ) );
+               $mockLangNameUtils->method( 'isSupportedLanguage' )->will( $this->returnCallback(
+                       function ( $code ) {
+                               return in_array( $code, [
+                                       'ar',
+                                       'arz',
+                                       'ba',
+                                       'de',
+                                       'en',
+                                       'ksh',
+                                       'ru',
+                               ] );
+                       }
+               ) );
+               $mockLangNameUtils->method( 'getMessagesFileName' )->will( $this->returnCallback(
+                       function ( $code ) {
+                               global $IP;
+                               $code = str_replace( '-', '_', ucfirst( $code ) );
+                               return "$IP/languages/messages/Messages$code.php";
+                       }
+               ) );
+               $mockLangNameUtils->expects( $this->never() )->method( $this->anythingBut(
+                       'isValidBuiltInCode', 'isSupportedLanguage', 'getMessagesFileName'
+               ) );
+
                $lc = $this->getMockBuilder( LocalisationCache::class )
                        ->setConstructorArgs( [
                                new ServiceOptions( LocalisationCache::CONSTRUCTOR_OPTIONS, [
@@ -33,7 +65,9 @@ class LocalisationCacheTest extends MediaWikiTestCase {
                                        'MessagesDirs' => [],
                                ] ),
                                new LCStoreDB( [] ),
-                               new NullLogger
+                               new NullLogger,
+                               [],
+                               $mockLangNameUtils
                        ] )
                        ->setMethods( [ 'getMessagesDirs' ] )
                        ->getMock();
index c443f20..68dfd37 100644 (file)
@@ -3,6 +3,24 @@
 use Wikimedia\TestingAccessWrapper;
 
 class LanguageTest extends LanguageClassesTestCase {
+       use LanguageNameUtilsTestTrait;
+
+       /** @var array Copy of $wgHooks from before we unset LanguageGetTranslatedLanguageNames */
+       private $origHooks;
+
+       public function setUp() {
+               global $wgHooks;
+
+               parent::setUp();
+
+               // Don't allow installed hooks to run, except if a test restores them via origHooks (needed
+               // for testIsKnownLanguageTag_cldr)
+               $this->origHooks = $wgHooks;
+               $newHooks = $wgHooks;
+               unset( $newHooks['LanguageGetTranslatedLanguageNames'] );
+               $this->setMwGlobals( 'wgHooks', $newHooks );
+       }
+
        /**
         * @covers Language::convertDoubleWidth
         * @covers Language::normalizeForSearch
@@ -510,84 +528,6 @@ class LanguageTest extends LanguageClassesTestCase {
                );
        }
 
-       /**
-        * Test Language::isValidBuiltInCode()
-        * @dataProvider provideLanguageCodes
-        * @covers Language::isValidBuiltInCode
-        */
-       public function testBuiltInCodeValidation( $code, $expected, $message = '' ) {
-               $this->assertEquals( $expected,
-                       (bool)Language::isValidBuiltInCode( $code ),
-                       "validating code $code $message"
-               );
-       }
-
-       public static function provideLanguageCodes() {
-               return [
-                       [ 'fr', true, 'Two letters, minor case' ],
-                       [ 'EN', false, 'Two letters, upper case' ],
-                       [ 'tyv', true, 'Three letters' ],
-                       [ 'be-tarask', true, 'With dash' ],
-                       [ 'be-x-old', true, 'With extension (two dashes)' ],
-                       [ 'be_tarask', false, 'Reject underscores' ],
-               ];
-       }
-
-       /**
-        * Test Language::isKnownLanguageTag()
-        * @dataProvider provideKnownLanguageTags
-        * @covers Language::isKnownLanguageTag
-        */
-       public function testKnownLanguageTag( $code, $message = '' ) {
-               $this->assertTrue(
-                       (bool)Language::isKnownLanguageTag( $code ),
-                       "validating code $code - $message"
-               );
-       }
-
-       public static function provideKnownLanguageTags() {
-               return [
-                       [ 'fr', 'simple code' ],
-                       [ 'bat-smg', 'an MW legacy tag' ],
-                       [ 'sgs', 'an internal standard MW name, for which a legacy tag is used externally' ],
-               ];
-       }
-
-       /**
-        * @covers Language::isKnownLanguageTag
-        */
-       public function testKnownCldrLanguageTag() {
-               if ( !class_exists( 'LanguageNames' ) ) {
-                       $this->markTestSkipped( 'The LanguageNames class is not available. '
-                               . 'The CLDR extension is probably not installed.' );
-               }
-
-               $this->assertTrue(
-                       (bool)Language::isKnownLanguageTag( 'pal' ),
-                       'validating code "pal" an ancient language, which probably will '
-                               . 'not appear in Names.php, but appears in CLDR in English'
-               );
-       }
-
-       /**
-        * Negative tests for Language::isKnownLanguageTag()
-        * @dataProvider provideUnKnownLanguageTags
-        * @covers Language::isKnownLanguageTag
-        */
-       public function testUnknownLanguageTag( $code, $message = '' ) {
-               $this->assertFalse(
-                       (bool)Language::isKnownLanguageTag( $code ),
-                       "checking that code $code is invalid - $message"
-               );
-       }
-
-       public static function provideUnknownLanguageTags() {
-               return [
-                       [ 'mw', 'non-existent two-letter code' ],
-                       [ 'foo"<bar', 'very invalid language code' ],
-               ];
-       }
-
        /**
         * Test too short timestamp
         * @expectedException MWException
@@ -1824,33 +1764,11 @@ class LanguageTest extends LanguageClassesTestCase {
                $lang->getGrammarTransformations();
                $this->assertNotNull( $languageClass->grammarTransformations );
 
-               // Populate $languageNameCache
-               Language::fetchLanguageNames();
-               $this->assertNotNull( $languageClass->languageNameCache );
-
                Language::clearCaches();
 
                $this->assertCount( 0, Language::$mLangObjCache );
                $this->assertCount( 0, $languageClass->fallbackLanguageCache );
                $this->assertNull( $languageClass->grammarTransformations );
-               $this->assertNull( $languageClass->languageNameCache );
-       }
-
-       /**
-        * @dataProvider provideIsSupportedLanguage
-        * @covers Language::isSupportedLanguage
-        */
-       public function testIsSupportedLanguage( $code, $expected, $comment ) {
-               $this->assertEquals( $expected, Language::isSupportedLanguage( $code ), $comment );
-       }
-
-       public static function provideIsSupportedLanguage() {
-               return [
-                       [ 'en', true, 'is supported language' ],
-                       [ 'fi', true, 'is supported language' ],
-                       [ 'bunny', false, 'is not supported language' ],
-                       [ 'FI', false, 'is not supported language, input should be in lower case' ],
-               ];
        }
 
        /**
@@ -1956,4 +1874,82 @@ class LanguageTest extends LanguageClassesTestCase {
                        [ 'èl', 'Ll' , 'Non-ASCII is overridden', [ 'è' => 'L' ] ],
                ];
        }
+
+       // The following methods are for LanguageNameUtilsTestTrait
+
+       private function isSupportedLanguage( $code ) {
+               return Language::isSupportedLanguage( $code );
+       }
+
+       private function isValidCode( $code ) {
+               return Language::isValidCode( $code );
+       }
+
+       private function isValidBuiltInCode( $code ) {
+               return Language::isValidBuiltInCode( $code );
+       }
+
+       private function isKnownLanguageTag( $code ) {
+               return Language::isKnownLanguageTag( $code );
+       }
+
+       /**
+        * Call getLanguageName() and getLanguageNames() using the Language static methods.
+        *
+        * @param array $options To set globals for testing Language
+        * @param string $expected
+        * @param string $code
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       private function assertGetLanguageNames( array $options, $expected, $code, ...$otherArgs ) {
+               if ( $options ) {
+                       foreach ( $options as $key => $val ) {
+                               $this->setMwGlobals( "wg$key", $val );
+                       }
+                       $this->resetServices();
+               }
+               $this->assertSame( $expected,
+                       Language::fetchLanguageNames( ...$otherArgs )[strtolower( $code )] ?? '' );
+               $this->assertSame( $expected, Language::fetchLanguageName( $code, ...$otherArgs ) );
+       }
+
+       private function getLanguageNames( ...$args ) {
+               return Language::fetchLanguageNames( ...$args );
+       }
+
+       private function getLanguageName( ...$args ) {
+               return Language::fetchLanguageName( ...$args );
+       }
+
+       private static function getFileName( ...$args ) {
+               return Language::getFileName( ...$args );
+       }
+
+       private static function getMessagesFileName( $code ) {
+               return Language::getMessagesFileName( $code );
+       }
+
+       private static function getJsonMessagesFileName( $code ) {
+               return Language::getJsonMessagesFileName( $code );
+       }
+
+       /**
+        * @todo This really belongs in the cldr extension's tests.
+        *
+        * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag
+        * @covers Language::isKnownLanguageTag
+        */
+       public function testIsKnownLanguageTag_cldr() {
+               if ( !class_exists( 'LanguageNames' ) ) {
+                       $this->markTestSkipped( 'The LanguageNames class is not available. '
+                               . 'The CLDR extension is probably not installed.' );
+               }
+
+               // We need to restore the extension's hook that we removed.
+               $this->setMwGlobals( 'wgHooks', $this->origHooks );
+
+               // "pal" is an ancient language, which probably will not appear in Names.php, but appears in
+               // CLDR in English
+               $this->assertTrue( Language::isKnownLanguageTag( 'pal' ) );
+       }
 }
diff --git a/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php b/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php
new file mode 100644 (file)
index 0000000..6fbd4a2
--- /dev/null
@@ -0,0 +1,66 @@
+<?php
+
+use MediaWiki\Config\ServiceOptions;
+use MediaWiki\Languages\LanguageNameUtils;
+
+class LanguageNameUtilsTest extends MediaWikiUnitTestCase {
+       /**
+        * @param array $optionsArray
+        */
+       private static function newObj( array $optionsArray = [] ) : LanguageNameUtils {
+               return new LanguageNameUtils( new ServiceOptions(
+                       LanguageNameUtils::$constructorOptions,
+                       $optionsArray,
+                       [
+                               'ExtraLanguageNames' => [],
+                               'LanguageCode' => 'en',
+                               'UsePigLatinVariant' => false,
+                       ]
+               ) );
+       }
+
+       use LanguageNameUtilsTestTrait;
+
+       private function isSupportedLanguage( $code ) {
+               return $this->newObj()->isSupportedLanguage( $code );
+       }
+
+       private function isValidCode( $code ) {
+               return $this->newObj()->isValidCode( $code );
+       }
+
+       private function isValidBuiltInCode( $code ) {
+               return $this->newObj()->isValidBuiltInCode( $code );
+       }
+
+       private function isKnownLanguageTag( $code ) {
+               return $this->newObj()->isKnownLanguageTag( $code );
+       }
+
+       private function assertGetLanguageNames( array $options, $expected, $code, ...$otherArgs ) {
+               $this->assertSame( $expected, $this->newObj( $options )
+                       ->getLanguageNames( ...$otherArgs )[strtolower( $code )] ?? '' );
+               $this->assertSame( $expected,
+                       $this->newObj( $options )->getLanguageName( $code, ...$otherArgs ) );
+       }
+
+       private function getLanguageNames( ...$args ) {
+               return $this->newObj()->getLanguageNames( ...$args );
+       }
+
+       private function getLanguageName( ...$args ) {
+               return $this->newObj()->getLanguageName( ...$args );
+       }
+
+       private static function getFileName( ...$args ) {
+               return self::newObj()->getFileName( ...$args );
+       }
+
+       private static function getMessagesFileName( $code ) {
+               return self::newObj()->getMessagesFileName( $code );
+       }
+
+       private static function getJsonMessagesFileName( $code ) {
+               return self::newObj()->getJsonMessagesFileName( $code );
+       }
+}
diff --git a/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php b/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php
new file mode 100644 (file)
index 0000000..bd777e9
--- /dev/null
@@ -0,0 +1,555 @@
+<?php
+
+use MediaWiki\Languages\LanguageNameUtils;
+
+const AUTONYMS = LanguageNameUtils::AUTONYMS;
+const ALL = LanguageNameUtils::ALL;
+const DEFINED = LanguageNameUtils::DEFINED;
+const SUPPORTED = LanguageNameUtils::SUPPORTED;
+
+/**
+ * For code shared between LanguageNameUtilsTest and LanguageTest.
+ */
+trait LanguageNameUtilsTestTrait {
+       abstract protected function isSupportedLanguage( $code );
+
+       /**
+        * @dataProvider provideIsSupportedLanguage
+        * @covers MediaWiki\Languages\LanguageNameUtils::__construct
+        * @covers MediaWiki\Languages\LanguageNameUtils::isSupportedLanguage
+        * @covers Language::isSupportedLanguage
+        */
+       public function testIsSupportedLanguage( $code, $expected ) {
+               $this->assertSame( $expected, $this->isSupportedLanguage( $code ) );
+       }
+
+       public static function provideIsSupportedLanguage() {
+               return [
+                       'en' => [ 'en', true ],
+                       'fi' => [ 'fi', true ],
+                       'bunny' => [ 'bunny', false ],
+                       'qqq' => [ 'qqq', false ],
+                       'uppercase is not considered supported' => [ 'FI', false ],
+               ];
+       }
+
+       abstract protected function isValidCode( $code );
+
+       /**
+        * We don't test that the result is cached, because that should only be noticeable if the
+        * configuration changes in between calls, and 1) that should never happen in normal operation,
+        * 2) if you do it you deserve whatever you get, and 3) once the static Language method is
+        * dropped and the invalid title regex is moved to something injected instead of a static call,
+        * the cache will be undetectable.
+        *
+        * @todo Should we test changes to $wgLegalTitleChars here? Does anybody actually change that?
+        * Is it possible to change it usefully without breaking everything?
+        *
+        * @dataProvider provideIsValidCode
+        * @covers MediaWiki\Languages\LanguageNameUtils::isValidCode
+        * @covers Language::isValidCode
+        *
+        * @param string $code
+        * @param bool $expected
+        */
+       public function testIsValidCode( $code, $expected ) {
+               $this->assertSame( $expected, $this->isValidCode( $code ) );
+       }
+
+       public static function provideIsValidCode() {
+               $ret = [
+                       'en' => [ 'en', true ],
+                       'en-GB' => [ 'en-GB', true ],
+                       'Funny chars' => [ "%!$()*,-.;=?@^_`~\x80\xA2\xFF+", true ],
+                       'Percent escape not allowed' => [ 'a%aF', false ],
+                       'Percent with only one following char is okay' => [ '%a', true ],
+                       'Percent with non-hex following chars is okay' => [ '%AG', true ],
+                       'Named char reference "a"' => [ 'a&a', false ],
+                       'Named char reference "A"' => [ 'a&A', false ],
+                       'Named char reference "0"' => [ 'a&0', false ],
+                       'Named char reference non-ASCII' => [ "a&\x92", false ],
+                       'Numeric char reference' => [ "a&#0", false ],
+                       'Hex char reference 0' => [ "a&#x0", false ],
+                       'Hex char reference A' => [ "a&#xA", false ],
+                       'Lone ampersand is valid for title but not lang code' => [ '&', false ],
+                       'Ampersand followed by just # is valid for title but not lang code' => [ '&#', false ],
+                       'Ampersand followed by # and non-x/digit is valid for title but not lang code' =>
+                               [ '&#a', false ],
+               ];
+               $disallowedChars = ":/\\\000&<>'\"";
+               foreach ( str_split( $disallowedChars ) as $char ) {
+                       $ret["Disallowed character $char"] = [ "a{$char}a", false ];
+               }
+               return $ret;
+       }
+
+       abstract protected function isValidBuiltInCode( $code );
+
+       /**
+        * @dataProvider provideIsValidBuiltInCode
+        * @covers MediaWiki\Languages\LanguageNameUtils::isValidBuiltInCode
+        * @covers Language::isValidBuiltInCode
+        *
+        * @param string $code
+        * @param bool $expected
+        */
+       public function testIsValidBuiltInCode( $code, $expected ) {
+               $this->assertSame( $expected, $this->isValidBuiltInCode( $code ) );
+       }
+
+       public static function provideIsValidBuiltInCode() {
+               return [
+                       'Two letters, lowercase' => [ 'fr', true ],
+                       'Two letters, uppercase' => [ 'EN', false ],
+                       'Three letters' => [ 'tyv', true ],
+                       'With dash' => [ 'be-tarask', true ],
+                       'With extension (two dashes)' => [ 'be-x-old', true ],
+                       'Reject underscores' => [ 'be_tarask', false ],
+                       'One letter' => [ 'a', false ],
+                       'Only digits' => [ '00', true ],
+                       'Only dashes' => [ '--', true ],
+                       'Unreasonably long' => [ str_repeat( 'x', 100 ), true ],
+                       'qqq' => [ 'qqq', true ],
+               ];
+       }
+
+       abstract protected function isKnownLanguageTag( $code );
+
+       /**
+        * @dataProvider provideIsKnownLanguageTag
+        * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag
+        * @covers Language::isKnownLanguageTag
+        *
+        * @param string $code
+        * @param bool $expected
+        */
+       public function testIsKnownLanguageTag( $code, $expected ) {
+               $this->assertSame( $expected, $this->isKnownLanguageTag( $code ) );
+       }
+
+       public static function provideIsKnownLanguageTag() {
+               $invalidBuiltInCodes = array_filter( static::provideIsValidBuiltInCode(),
+                       function ( $arr ) {
+                               // If isValidBuiltInCode() returns false, we want to also, but if it returns true,
+                               // we could still return false from isKnownLanguageTag(), so skip those.
+                               return !$arr[1];
+                       }
+               );
+               return array_merge( $invalidBuiltInCodes, [
+                       'Simple code' => [ 'fr', true ],
+                       'An MW legacy tag' => [ 'bat-smg', true ],
+                       'An internal standard MW name, for which a legacy tag is used externally' =>
+                               [ 'sgs', true ],
+                       'Non-existent two-letter code' => [ 'mw', false ],
+                       'Very invalid language code' => [ 'foo"<bar', false ],
+               ] );
+       }
+
+       abstract protected function assertGetLanguageNames(
+               array $options, $expected, $code, ...$otherArgs
+       );
+
+       abstract protected function getLanguageNames( ...$args );
+
+       abstract protected function getLanguageName( ...$args );
+
+       /**
+        * @dataProvider provideGetLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        *
+        * @param string $expected
+        * @param string $code
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       public function testGetLanguageNames( $expected, $code, ...$otherArgs ) {
+               $this->assertGetLanguageNames( [], $expected, $code, ...$otherArgs );
+       }
+
+       public static function provideGetLanguageNames() {
+               // @todo There are probably lots of interesting tests to add here.
+               return [
+                       'Simple code' => [ 'Deutsch', 'de' ],
+                       'Simple code in a different language (doesn\'t work without hook)' =>
+                               [ 'Deutsch', 'de', 'fr' ],
+                       'Invalid code' => [ '', '&' ],
+                       'Pig Latin not enabled' => [ '', 'en-x-piglatin', AUTONYMS, ALL ],
+                       'qqq doesn\'t have a name' => [ '', 'qqq', AUTONYMS, ALL ],
+                       'An MW legacy tag is recognized' => [ 'žemaitėška', 'bat-smg' ],
+                       // @todo Is the next test's result desired?
+                       'An MW legacy tag is not supported' => [ '', 'bat-smg', AUTONYMS, SUPPORTED ],
+                       'An internal standard name, for which a legacy tag is used externally, is supported' =>
+                               [ 'žemaitėška', 'sgs', AUTONYMS, SUPPORTED ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideGetLanguageNames_withHook
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        *
+        * @param string $expected Expected return value of getLanguageName()
+        * @param string $code
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       public function testGetLanguageNames_withHook( $expected, $code, ...$otherArgs ) {
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function ( &$names, $inLanguage ) {
+                               switch ( $inLanguage ) {
+                               case 'de':
+                                       $names = [
+                                               'de' => 'Deutsch',
+                                               'en' => 'Englisch',
+                                               'fr' => 'Französisch',
+                                       ];
+                                       break;
+
+                               case 'en':
+                                       $names = [
+                                               'de' => 'German',
+                                               'en' => 'English',
+                                               'fr' => 'French',
+                                               'sqsqsqsq' => '!!?!',
+                                               'bat-smg' => 'Samogitian',
+                                       ];
+                                       break;
+
+                               case 'fr':
+                                       $names = [
+                                               'de' => 'allemand',
+                                               'en' => 'anglais',
+                                               // Deliberate mistake (no cedilla)
+                                               'fr' => 'francais',
+                                       ];
+                                       break;
+                               }
+                       }
+               );
+
+               // Really we could dispense with assertGetLanguageNames() and just call
+               // testGetLanguageNames() here, but it looks weird to call a test method from another test
+               // method.
+               $this->assertGetLanguageNames( [], $expected, $code, ...$otherArgs );
+       }
+
+       public static function provideGetLanguageNames_withHook() {
+               return [
+                       'Simple code in a different language' => [ 'allemand', 'de', 'fr' ],
+                       'Invalid inLanguage defaults to English' => [ 'German', 'de', '&' ],
+                       'If inLanguage not provided, default to autonym' => [ 'Deutsch', 'de' ],
+                       'Hooks ignored for explicitly-requested autonym' => [ 'français', 'fr', 'fr' ],
+                       'Hooks don\'t make a language supported' => [ '', 'bat-smg', 'en', SUPPORTED ],
+                       'Hooks don\'t make a language defined' => [ '', 'sqsqsqsq', 'en', DEFINED ],
+                       'Hooks do make a language name returned with ALL' => [ '!!?!', 'sqsqsqsq', 'en', ALL ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideGetLanguageNames_ExtraLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        *
+        * @param string $expected Expected return value of getLanguageName()
+        * @param string $code
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       public function testGetLanguageNames_ExtraLanguageNames( $expected, $code, ...$otherArgs ) {
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function ( &$names ) {
+                               $names['de'] = 'die deutsche Sprache';
+                       }
+               );
+               $this->assertGetLanguageNames(
+                       [ 'ExtraLanguageNames' => [ 'de' => 'deutsche Sprache', 'sqsqsqsq' => '!!?!' ] ],
+                       $expected, $code, ...$otherArgs
+               );
+       }
+
+       public static function provideGetLanguageNames_ExtraLanguageNames() {
+               return [
+                       'Simple extra language name' => [ '!!?!', 'sqsqsqsq' ],
+                       'Extra language is defined' => [ '!!?!', 'sqsqsqsq', AUTONYMS, DEFINED ],
+                       'Extra language is not supported' => [ '', 'sqsqsqsq', AUTONYMS, SUPPORTED ],
+                       'Extra language overrides default' => [ 'deutsche Sprache', 'de' ],
+                       'Extra language overrides hook for explicitly requested autonym' =>
+                               [ 'deutsche Sprache', 'de', 'de' ],
+                       'Hook overrides extra language for non-autonym' =>
+                               [ 'die deutsche Sprache', 'de', 'fr' ],
+               ];
+       }
+
+       /**
+        * Test that getLanguageNames() defaults to DEFINED, and getLanguageName() defaults to ALL.
+        *
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        */
+       public function testGetLanguageNames_parameterDefault() {
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function ( &$names ) {
+                               $names = [ 'sqsqsqsq' => '!!?!' ];
+                       }
+               );
+
+               // We use 'en' here because the hook is not run if we're requesting autonyms, although in
+               // this case (language that isn't defined by MediaWiki itself) that behavior seems wrong.
+               $this->assertArrayNotHasKey( 'sqsqsqsq', $this->getLanguageNames(), 'en' );
+
+               $this->assertSame( '!!?!', $this->getLanguageName( 'sqsqsqsq', 'en' ) );
+       }
+
+       /**
+        * @dataProvider provideGetLanguageNames_sorted
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers Language::fetchLanguageNames
+        *
+        * @param mixed ...$args To pass to method
+        */
+       public function testGetLanguageNames_sorted( ...$args ) {
+               $names = $this->getLanguageNames( ...$args );
+               $sortedNames = $names;
+               ksort( $sortedNames );
+               $this->assertSame( $sortedNames, $names );
+       }
+
+       public static function provideGetLanguageNames_sorted() {
+               return [
+                       [],
+                       [ AUTONYMS ],
+                       [ AUTONYMS, 'mw' ],
+                       [ AUTONYMS, ALL ],
+                       [ AUTONYMS, SUPPORTED ],
+                       [ 'he', 'mw' ],
+                       [ 'he', ALL ],
+                       [ 'he', SUPPORTED ],
+               ];
+       }
+
+       /**
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers Language::fetchLanguageNames
+        */
+       public function testGetLanguageNames_hookNotCalledForAutonyms() {
+               $count = 0;
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function () use ( &$count ) {
+                               $count++;
+                       }
+               );
+
+               $this->getLanguageNames();
+               $this->assertSame( 0, $count, 'Hook must not be called for autonyms' );
+
+               // We test elsewhere that the hook works, but the following verifies that our test is
+               // working and $count isn't being incremented above only because we're checking autonyms.
+               $this->getLanguageNames( 'fr' );
+               $this->assertSame( 1, $count, 'Hook must be called for non-autonyms' );
+       }
+
+       /**
+        * @dataProvider provideGetLanguageNames_pigLatin
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        *
+        * @param string $expected
+        * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
+        */
+       public function testGetLanguageNames_pigLatin( $expected, ...$otherArgs ) {
+               $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
+                       function ( &$names, $inLanguage ) {
+                               switch ( $inLanguage ) {
+                               case 'fr':
+                                       $names = [ 'en-x-piglatin' => 'latin de cochons' ];
+                                       break;
+
+                               case 'en-x-piglatin':
+                                       // Deliberately lowercase
+                                       $names = [ 'en-x-piglatin' => 'igpay atinlay' ];
+                                       break;
+                               }
+                       }
+               );
+
+               $this->assertGetLanguageNames(
+                       [ 'UsePigLatinVariant' => true ], $expected, 'en-x-piglatin', ...$otherArgs );
+       }
+
+       public static function provideGetLanguageNames_pigLatin() {
+               return [
+                       'Simple test' => [ 'Igpay Atinlay' ],
+                       'Not supported' => [ '', AUTONYMS, SUPPORTED ],
+                       'Foreign language' => [ 'latin de cochons', 'fr' ],
+                       'Hook doesn\'t override explicit autonym' =>
+                               [ 'Igpay Atinlay', 'en-x-piglatin', 'en-x-piglatin' ],
+               ];
+       }
+
+       /**
+        * Just for the sake of completeness, test that ExtraLanguageNames will not override the name
+        * for pig Latin. Nobody actually cares about this and if anything current behavior is probably
+        * wrong, but once we're testing the whole file we may as well be comprehensive.
+        *
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
+        * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
+        * @covers Language::fetchLanguageNames
+        * @covers Language::fetchLanguageName
+        */
+       public function testGetLanguageNames_pigLatinAndExtraLanguageNames() {
+               $this->assertGetLanguageNames(
+                       [
+                               'UsePigLatinVariant' => true,
+                               'ExtraLanguageNames' => [ 'en-x-piglatin' => 'igpay atinlay' ]
+                       ],
+                       'Igpay Atinlay',
+                       'en-x-piglatin'
+               );
+       }
+
+       abstract protected static function getFileName( ...$args );
+
+       /**
+        * @dataProvider provideGetFileName
+        * @covers MediaWiki\Languages\LanguageNameUtils::getFileName
+        * @covers Language::getFileName
+        *
+        * @param string $expected
+        * @param mixed ...$args To pass to method
+        */
+       public function testGetFileName( $expected, ...$args ) {
+               $this->assertSame( $expected, $this->getFileName( ...$args ) );
+       }
+
+       public static function provideGetFileName() {
+               return [
+                       'Simple case' => [ 'MessagesXx.php', 'Messages', 'xx' ],
+                       'With extension' => [ 'MessagesXx.ext', 'Messages', 'xx', '.ext' ],
+                       'Replacing dashes' => [ '!__?', '!', '--', '?' ],
+                       'Empty prefix and extension' => [ 'Xx', '', 'xx', '' ],
+                       'Uppercase only first letter' => [ 'Messages_a.php', 'Messages', '-a' ],
+               ];
+       }
+
+       abstract protected function getMessagesFileName( $code );
+
+       /**
+        * @dataProvider provideGetMessagesFileName
+        * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
+        * @covers Language::getMessagesFileName
+        *
+        * @param string $code
+        * @param string $expected
+        */
+       public function testGetMessagesFileName( $code, $expected ) {
+               $this->assertSame( $expected, $this->getMessagesFileName( $code ) );
+       }
+
+       public static function provideGetMessagesFileName() {
+               global $IP;
+               return [
+                       'Simple case' => [ 'en', "$IP/languages/messages/MessagesEn.php" ],
+                       'Replacing dashes' => [ '--', "$IP/languages/messages/Messages__.php" ],
+                       'Uppercase only first letter' => [ '-a', "$IP/languages/messages/Messages_a.php" ],
+               ];
+       }
+
+       /**
+        * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
+        * @covers Language::getMessagesFileName
+        */
+       public function testGetMessagesFileName_withHook() {
+               $called = 0;
+
+               $this->setTemporaryHook( 'Language::getMessagesFileName',
+                       function ( $code, &$file ) use ( &$called ) {
+                               global $IP;
+
+                               $called++;
+
+                               $this->assertSame( 'ab-cd', $code );
+                               $this->assertSame( "$IP/languages/messages/MessagesAb_cd.php", $file );
+                               $file = 'bye-bye';
+                       }
+               );
+
+               $this->assertSame( 'bye-bye', $this->getMessagesFileName( 'ab-cd' ) );
+               $this->assertSame( 1, $called );
+       }
+
+       abstract protected function getJsonMessagesFileName( $code );
+
+       /**
+        * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName
+        * @covers Language::getJsonMessagesFileName
+        */
+       public function testGetJsonMessagesFileName() {
+               global $IP;
+
+               // Not so much to test here, one test seems to be enough
+               $expected = "$IP/languages/i18n/en--123.json";
+               $this->assertSame( $expected, $this->getJsonMessagesFileName( 'en--123' ) );
+       }
+
+       /**
+        * getFileName, getMessagesFileName, and getJsonMessagesFileName all throw if they get an
+        * invalid code. To save boilerplate, test them all in one method.
+        *
+        * @dataProvider provideExceptionFromInvalidCode
+        * @covers MediaWiki\Languages\LanguageNameUtils::getFileName
+        * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
+        * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName
+        * @covers Language::getFileName
+        * @covers Language::getMessagesFileName
+        * @covers Language::getJsonMessagesFileName
+        *
+        * @param callable $callback Will throw when passed $code
+        * @param string $code
+        */
+       public function testExceptionFromInvalidCode( $callback, $code ) {
+               $this->setExpectedException( MWException::class, "Invalid language code \"$code\"" );
+
+               $callback( $code );
+       }
+
+       public static function provideExceptionFromInvalidCode() {
+               $ret = [];
+               foreach ( static::provideIsValidBuiltInCode() as $desc => list( $code, $valid ) ) {
+                       if ( $valid ) {
+                               // Won't get an exception from this one
+                               continue;
+                       }
+
+                       // For getFileName, we define an anonymous function because of the extra first param
+                       $ret["getFileName: $desc"] = [
+                               function ( $code ) {
+                                       return static::getFileName( 'Messages', $code );
+                               },
+                               $code
+                       ];
+
+                       $ret["getMessagesFileName: $desc"] =
+                               [ [ static::class, 'getMessagesFileName' ], $code ];
+
+                       $ret["getJsonMessagesFileName: $desc"] =
+                               [ [ static::class, 'getJsonMessagesFileName' ], $code ];
+               }
+               return $ret;
+       }
+}