From e3537bdc00244cefdaa34710be0fb0d920336ed8 Mon Sep 17 00:00:00 2001 From: Aryeh Gregor Date: Thu, 2 May 2019 17:23:42 +0300 Subject: [PATCH] Split some Language methods to LanguageNameUtils These are static methods that have to do with processing language names and codes. I didn't include fallback behavior, because that would mean a circular dependency with LocalisationCache. In the new class, I renamed AS_AUTONYMS to AUTONYMS, and added a class constant DEFINED for 'mw' to match the existing SUPPORTED and ALL. I also renamed fetchLanguageName(s) to getLanguageName(s). There is 100% test coverage for the code in the new class. This was previously committed as 2e52f48c2ed and reverted because it depended on e4468a1d6b6, which had to be reverted for performance issues. There should be no changes other than rebasing. Bug: T201405 Change-Id: Ifa346c8a92bf1eb57dc5e79458b32b7b26f1ee8a (cherry picked from commit 6d80b6c0827401cf8e41589bf134147bb0aa407f) --- RELEASE-NOTES-1.34 | 5 + autoload.php | 1 + includes/MediaWikiServices.php | 9 + includes/ServiceWiring.php | 11 +- .../cache/localisation/LocalisationCache.php | 15 +- includes/language/LanguageCode.php | 1 - includes/language/LanguageNameUtils.php | 319 ++++++++++ languages/Language.php | 211 ++----- languages/data/Names.php | 2 +- maintenance/rebuildLocalisationCache.php | 3 +- tests/common/TestsAutoLoader.php | 1 + tests/phpunit/MediaWikiUnitTestCase.php | 3 +- .../includes/api/ApiQuerySiteinfoTest.php | 1 + .../includes/cache/LocalisationCacheTest.php | 36 +- tests/phpunit/languages/LanguageTest.php | 196 +++---- .../language/LanguageNameUtilsTest.php | 66 +++ .../language/LanguageNameUtilsTestTrait.php | 555 ++++++++++++++++++ 17 files changed, 1165 insertions(+), 270 deletions(-) create mode 100644 includes/language/LanguageNameUtils.php create mode 100644 tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php create mode 100644 tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php diff --git a/RELEASE-NOTES-1.34 b/RELEASE-NOTES-1.34 index e98af0acd2..e808078e01 100644 --- a/RELEASE-NOTES-1.34 +++ b/RELEASE-NOTES-1.34 @@ -607,6 +607,11 @@ because of Phabricator reports. * RESTBagOStuff users should specify either "JSON" or "PHP" serialization type. * Language::getLocalisationCache() is deprecated. Use MediaWikiServices instead. +* The following Language methods are deprecated: isSupportedLanguage, + isValidCode, isValidBuiltInCode, isKnownLanguageTag, fetchLanguageNames, + fetchLanguageName, getFileName, getMessagesFileName, getJsonMessagesFileName. + Use the new LanguageNameUtils class instead. (Note that fetchLanguageName(s) + are called getLanguageName(s) in the new class.) === Other changes in 1.34 === * Added option to specify "Various authors" as author in extension credits using diff --git a/autoload.php b/autoload.php index dc57ff6ed9..b2147a50a0 100644 --- a/autoload.php +++ b/autoload.php @@ -881,6 +881,7 @@ $wgAutoloadLocalClasses = [ 'MediaWiki\\Languages\\Data\\CrhExceptions' => __DIR__ . '/languages/data/CrhExceptions.php', 'MediaWiki\\Languages\\Data\\Names' => __DIR__ . '/languages/data/Names.php', 'MediaWiki\\Languages\\Data\\ZhConversion' => __DIR__ . '/languages/data/ZhConversion.php', + 'MediaWiki\\Languages\\LanguageNameUtils' => __DIR__ . '/includes/language/LanguageNameUtils.php', 'MediaWiki\\Logger\\ConsoleLogger' => __DIR__ . '/includes/debug/logger/ConsoleLogger.php', 'MediaWiki\\Logger\\ConsoleSpi' => __DIR__ . '/includes/debug/logger/ConsoleSpi.php', 'MediaWiki\\Logger\\LegacyLogger' => __DIR__ . '/includes/debug/logger/LegacyLogger.php', diff --git a/includes/MediaWikiServices.php b/includes/MediaWikiServices.php index a32fbefc7a..3214e6af6e 100644 --- a/includes/MediaWikiServices.php +++ b/includes/MediaWikiServices.php @@ -21,6 +21,7 @@ use MediaWiki\FileBackend\FSFile\TempFSFileFactory; use MediaWiki\FileBackend\LockManager\LockManagerGroupFactory; use MediaWiki\Http\HttpRequestFactory; use PasswordReset; +use MediaWiki\Languages\LanguageNameUtils; use Wikimedia\Message\IMessageFormatterFactory; use MediaWiki\Page\MovePageFactory; use MediaWiki\Permissions\PermissionManager; @@ -627,6 +628,14 @@ class MediaWikiServices extends ServiceContainer { return $this->getService( 'InterwikiLookup' ); } + /** + * @since 1.34 + * @return LanguageNameUtils + */ + public function getLanguageNameUtils() { + return $this->getService( 'LanguageNameUtils' ); + } + /** * @since 1.28 * @return LinkCache diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php index f6d0ec06c9..ed47b5b268 100644 --- a/includes/ServiceWiring.php +++ b/includes/ServiceWiring.php @@ -56,6 +56,7 @@ use MediaWiki\FileBackend\LockManager\LockManagerGroupFactory; use MediaWiki\Http\HttpRequestFactory; use MediaWiki\Interwiki\ClassicInterwikiLookup; use MediaWiki\Interwiki\InterwikiLookup; +use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\Linker\LinkRenderer; use MediaWiki\Linker\LinkRendererFactory; use MediaWiki\Logger\LoggerFactory; @@ -259,6 +260,13 @@ return [ ); }, + 'LanguageNameUtils' => function ( MediaWikiServices $services ) : LanguageNameUtils { + return new LanguageNameUtils( new ServiceOptions( + LanguageNameUtils::$constructorOptions, + $services->getMainConfig() + ) ); + }, + 'LinkCache' => function ( MediaWikiServices $services ) : LinkCache { return new LinkCache( $services->getTitleFormatter(), @@ -313,7 +321,8 @@ return [ $logger, [ function () use ( $services ) { $services->getResourceLoader()->getMessageBlobStore()->clear(); - } ] + } ], + $services->getLanguageNameUtils() ); }, diff --git a/includes/cache/localisation/LocalisationCache.php b/includes/cache/localisation/LocalisationCache.php index a9e69697d6..49b2a4786e 100644 --- a/includes/cache/localisation/LocalisationCache.php +++ b/includes/cache/localisation/LocalisationCache.php @@ -23,6 +23,7 @@ use CLDRPluralRuleParser\Evaluator; use CLDRPluralRuleParser\Error as CLDRPluralRuleError; use MediaWiki\Config\ServiceOptions; +use MediaWiki\Languages\LanguageNameUtils; use Psr\Log\LoggerInterface; /** @@ -73,6 +74,9 @@ class LocalisationCache { /** @var callable[] See comment for parameter in constructor */ private $clearStoreCallbacks; + /** @var LanguageNameUtils */ + private $langNameUtils; + /** * A 2-d associative array, code/key, where presence indicates that the item * is loaded. Value arbitrary. @@ -244,13 +248,15 @@ class LocalisationCache { * @param callable[] $clearStoreCallbacks To be called whenever the cache is cleared. Can be * used to clear other caches that depend on this one, such as ResourceLoader's * MessageBlobStore. + * @param LanguageNameUtils $langNameUtils * @throws MWException */ function __construct( ServiceOptions $options, LCStore $store, LoggerInterface $logger, - array $clearStoreCallbacks = [] + array $clearStoreCallbacks, + LanguageNameUtils $langNameUtils ) { $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); @@ -258,6 +264,7 @@ class LocalisationCache { $this->store = $store; $this->logger = $logger; $this->clearStoreCallbacks = $clearStoreCallbacks; + $this->langNameUtils = $langNameUtils; // Keep this separate from $this->options so it can be mutable $this->manualRecache = $options->get( 'manualRecache' ); @@ -470,7 +477,7 @@ class LocalisationCache { $this->initialisedLangs[$code] = true; # If the code is of the wrong form for a Messages*.php file, do a shallow fallback - if ( !Language::isValidBuiltInCode( $code ) ) { + if ( !$this->langNameUtils->isValidBuiltInCode( $code ) ) { $this->initShallowFallback( $code, 'en' ); return; @@ -478,7 +485,7 @@ class LocalisationCache { # Recache the data if necessary if ( !$this->manualRecache && $this->isExpired( $code ) ) { - if ( Language::isSupportedLanguage( $code ) ) { + if ( $this->langNameUtils->isSupportedLanguage( $code ) ) { $this->recache( $code ); } elseif ( $code === 'en' ) { throw new MWException( 'MessagesEn.php is missing.' ); @@ -707,7 +714,7 @@ class LocalisationCache { global $IP; // This reads in the PHP i18n file with non-messages l10n data - $fileName = Language::getMessagesFileName( $code ); + $fileName = $this->langNameUtils->getMessagesFileName( $code ); if ( !file_exists( $fileName ) ) { $data = []; } else { diff --git a/includes/language/LanguageCode.php b/includes/language/LanguageCode.php index 7d954d3803..1d2f0b4e83 100644 --- a/includes/language/LanguageCode.php +++ b/includes/language/LanguageCode.php @@ -21,7 +21,6 @@ /** * Methods for dealing with language codes. - * @todo Move some of the code-related static methods out of Language into this class * * @since 1.29 * @ingroup Language diff --git a/includes/language/LanguageNameUtils.php b/includes/language/LanguageNameUtils.php new file mode 100644 index 0000000000..08d9ab3e0d --- /dev/null +++ b/includes/language/LanguageNameUtils.php @@ -0,0 +1,319 @@ +assertRequiredOptions( self::$constructorOptions ); + $this->options = $options; + } + + /** + * Checks whether any localisation is available for that language tag in MediaWiki + * (MessagesXx.php or xx.json exists). + * + * @param string $code Language tag (in lower case) + * @return bool Whether language is supported + */ + public function isSupportedLanguage( $code ) { + if ( !$this->isValidBuiltInCode( $code ) ) { + return false; + } + + if ( $code === 'qqq' ) { + // Special code for internal use, not supported even though there is a qqq.json + return false; + } + + return is_readable( $this->getMessagesFileName( $code ) ) || + is_readable( $this->getJsonMessagesFileName( $code ) ); + } + + /** + * Returns true if a language code string is of a valid form, whether or not it exists. This + * includes codes which are used solely for customisation via the MediaWiki namespace. + * + * @param string $code + * + * @return bool + */ + public function isValidCode( $code ) { + Assert::parameterType( 'string', $code, '$code' ); + if ( !isset( $this->validCodeCache[$code] ) ) { + // People think language codes are HTML-safe, so enforce it. Ideally we should only + // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks. See bugs + // T39564, T39587, T38938. + $this->validCodeCache[$code] = + // Protect against path traversal + strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) && + !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code ); + } + return $this->validCodeCache[$code]; + } + + /** + * Returns true if a language code is of a valid form for the purposes of internal customisation + * of MediaWiki, via Messages*.php or *.json. + * + * @param string $code + * @return bool + */ + public function isValidBuiltInCode( $code ) { + Assert::parameterType( 'string', $code, '$code' ); + + return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code ); + } + + /** + * Returns true if a language code is an IETF tag known to MediaWiki. + * + * @param string $tag + * + * @return bool + */ + public function isKnownLanguageTag( $tag ) { + // Quick escape for invalid input to avoid exceptions down the line when code tries to + // process tags which are not valid at all. + if ( !$this->isValidBuiltInCode( $tag ) ) { + return false; + } + + if ( isset( Data\Names::$names[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) { + return true; + } + + return false; + } + + /** + * Get an array of language names, indexed by code. + * @param null|string $inLanguage Code of language in which to return the names + * Use self::AUTONYMS for autonyms (native names) + * @param string $include One of: + * self::ALL all available languages + * self::DEFINED only if the language is defined in MediaWiki or wgExtraLanguageNames + * (default) + * self::SUPPORTED only if the language is in self::DEFINED *and* has a message file + * @return array Language code => language name (sorted by key) + */ + public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) { + $cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage; + $cacheKey .= ":$include"; + if ( !$this->languageNameCache ) { + $this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] ); + } + + $ret = $this->languageNameCache->get( $cacheKey ); + if ( !$ret ) { + $ret = $this->getLanguageNamesUncached( $inLanguage, $include ); + $this->languageNameCache->set( $cacheKey, $ret ); + } + return $ret; + } + + /** + * Uncached helper for getLanguageNames + * @param null|string $inLanguage As getLanguageNames + * @param string $include As getLanguageNames + * @return array Language code => language name (sorted by key) + */ + private function getLanguageNamesUncached( $inLanguage, $include ) { + // If passed an invalid language code to use, fallback to en + if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) { + $inLanguage = 'en'; + } + + $names = []; + + if ( $inLanguage !== self::AUTONYMS ) { + # TODO: also include for self::AUTONYMS, when this code is more efficient + Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] ); + } + + $mwNames = $this->options->get( 'ExtraLanguageNames' ) + Data\Names::$names; + if ( $this->options->get( 'UsePigLatinVariant' ) ) { + // Pig Latin (for variant development) + $mwNames['en-x-piglatin'] = 'Igpay Atinlay'; + } + + foreach ( $mwNames as $mwCode => $mwName ) { + # - Prefer own MediaWiki native name when not using the hook + # - For other names just add if not added through the hook + if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) { + $names[$mwCode] = $mwName; + } + } + + if ( $include === self::ALL ) { + ksort( $names ); + return $names; + } + + $returnMw = []; + $coreCodes = array_keys( $mwNames ); + foreach ( $coreCodes as $coreCode ) { + $returnMw[$coreCode] = $names[$coreCode]; + } + + if ( $include === self::SUPPORTED ) { + $namesMwFile = []; + # We do this using a foreach over the codes instead of a directory loop so that messages + # files in extensions will work correctly. + foreach ( $returnMw as $code => $value ) { + if ( is_readable( $this->getMessagesFileName( $code ) ) || + is_readable( $this->getJsonMessagesFileName( $code ) ) + ) { + $namesMwFile[$code] = $names[$code]; + } + } + + ksort( $namesMwFile ); + return $namesMwFile; + } + + ksort( $returnMw ); + # self::DEFINED option; default if it's not one of the other two options + # (self::ALL/self::SUPPORTED) + return $returnMw; + } + + /** + * @param string $code The code of the language for which to get the name + * @param null|string $inLanguage Code of language in which to return the name (self::AUTONYMS + * for autonyms) + * @param string $include See getLanguageNames(), except this defaults to self::ALL instead of + * self::DEFINED + * @return string Language name or empty + * @since 1.20 + */ + public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) { + $code = strtolower( $code ); + $array = $this->getLanguageNames( $inLanguage, $include ); + return $array[$code] ?? ''; + } + + /** + * Get the name of a file for a certain language code + * @param string $prefix Prepend this to the filename + * @param string $code Language code + * @param string $suffix Append this to the filename + * @throws MWException + * @return string $prefix . $mangledCode . $suffix + */ + public function getFileName( $prefix, $code, $suffix = '.php' ) { + if ( !$this->isValidBuiltInCode( $code ) ) { + throw new MWException( "Invalid language code \"$code\"" ); + } + + return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix; + } + + /** + * @param string $code + * @return string + */ + public function getMessagesFileName( $code ) { + global $IP; + $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' ); + Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] ); + return $file; + } + + /** + * @param string $code + * @return string + * @throws MWException + */ + public function getJsonMessagesFileName( $code ) { + global $IP; + + if ( !$this->isValidBuiltInCode( $code ) ) { + throw new MWException( "Invalid language code \"$code\"" ); + } + + return "$IP/languages/i18n/$code.json"; + } +} diff --git a/languages/Language.php b/languages/Language.php index a8950f5a13..39b5f73ae4 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -27,8 +27,8 @@ */ use CLDRPluralRuleParser\Evaluator; +use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\MediaWikiServices; -use Wikimedia\Assert\Assert; /** * Internationalisation code @@ -38,21 +38,24 @@ class Language { /** * Return autonyms in fetchLanguageName(s). * @since 1.32 + * @deprecated since 1.34, LanguageNameUtils::AUTONYMS */ - const AS_AUTONYMS = null; + const AS_AUTONYMS = LanguageNameUtils::AUTONYMS; /** * Return all known languages in fetchLanguageName(s). * @since 1.32 + * @deprecated since 1.34, use LanguageNameUtils::ALL */ - const ALL = 'all'; + const ALL = LanguageNameUtils::ALL; /** * Return in fetchLanguageName(s) only the languages for which we have at * least some localisation. * @since 1.32 + * @deprecated since 1.34, use LanguageNameUtils::SUPPORTED */ - const SUPPORTED = 'mwfile'; + const SUPPORTED = LanguageNameUtils::SUPPORTED; /** * @var LanguageConverter|FakeConverter @@ -80,6 +83,9 @@ class Language { /** @var LocalisationCache */ private $localisationCache; + /** @var LanguageNameUtils */ + private $langNameUtils; + public static $mLangObjCache = []; /** @@ -94,6 +100,7 @@ class Language { */ const STRICT_FALLBACKS = 1; + // TODO Make these const once we drop HHVM support (T192166) public static $mWeekdayMsgs = [ 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday' @@ -178,12 +185,6 @@ class Language { */ private static $grammarTransformations; - /** - * Cache for language names - * @var HashBagOStuff|null - */ - private static $languageNameCache; - /** * Unicode directional formatting characters, for embedBidi() */ @@ -239,11 +240,12 @@ class Language { * @return Language */ protected static function newFromCode( $code, $fallback = false ) { - if ( !self::isValidCode( $code ) ) { + $langNameUtils = MediaWikiServices::getInstance()->getLanguageNameUtils(); + if ( !$langNameUtils->isValidCode( $code ) ) { throw new MWException( "Invalid language code \"$code\"" ); } - if ( !self::isValidBuiltInCode( $code ) ) { + if ( !$langNameUtils->isValidBuiltInCode( $code ) ) { // It's not possible to customise this code with class files, so // just return a Language object. This is to support uselang= hacks. $lang = new Language; @@ -262,7 +264,7 @@ class Language { // Keep trying the fallback list until we find an existing class $fallbacks = self::getFallbacksFor( $code ); foreach ( $fallbacks as $fallbackCode ) { - if ( !self::isValidBuiltInCode( $fallbackCode ) ) { + if ( !$langNameUtils->isValidBuiltInCode( $fallbackCode ) ) { throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" ); } @@ -288,32 +290,25 @@ class Language { } if ( defined( 'MW_PHPUNIT_TEST' ) ) { MediaWikiServices::getInstance()->resetServiceForTesting( 'LocalisationCache' ); + MediaWikiServices::getInstance()->resetServiceForTesting( 'LanguageNameUtils' ); } self::$mLangObjCache = []; self::$fallbackLanguageCache = []; self::$grammarTransformations = null; - self::$languageNameCache = null; } /** * Checks whether any localisation is available for that language tag * in MediaWiki (MessagesXx.php exists). * + * @deprecated since 1.34, use LanguageNameUtils * @param string $code Language tag (in lower case) * @return bool Whether language is supported * @since 1.21 */ public static function isSupportedLanguage( $code ) { - if ( !self::isValidBuiltInCode( $code ) ) { - return false; - } - - if ( $code === 'qqq' ) { - return false; - } - - return is_readable( self::getMessagesFileName( $code ) ) || - is_readable( self::getJsonMessagesFileName( $code ) ); + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->isSupportedLanguage( $code ); } /** @@ -381,63 +376,45 @@ class Language { * not it exists. This includes codes which are used solely for * customisation via the MediaWiki namespace. * + * @deprecated since 1.34, use LanguageNameUtils + * * @param string $code * * @return bool */ public static function isValidCode( $code ) { - static $cache = []; - Assert::parameterType( 'string', $code, '$code' ); - if ( !isset( $cache[$code] ) ) { - // People think language codes are html safe, so enforce it. - // Ideally we should only allow a-zA-Z0-9- - // but, .+ and other chars are often used for {{int:}} hacks - // see bugs T39564, T39587, T38938 - $cache[$code] = - // Protect against path traversal - strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) - && !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code ); - } - return $cache[$code]; + return MediaWikiServices::getInstance()->getLanguageNameUtils()->isValidCode( $code ); } /** * Returns true if a language code is of a valid form for the purposes of * internal customisation of MediaWiki, via Messages*.php or *.json. * + * @deprecated since 1.34, use LanguageNameUtils + * * @param string $code * * @since 1.18 * @return bool */ public static function isValidBuiltInCode( $code ) { - Assert::parameterType( 'string', $code, '$code' ); - - return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code ); + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->isValidBuiltInCode( $code ); } /** * Returns true if a language code is an IETF tag known to MediaWiki. * + * @deprecated since 1.34, use LanguageNameUtils + * * @param string $tag * * @since 1.21 * @return bool */ public static function isKnownLanguageTag( $tag ) { - // Quick escape for invalid input to avoid exceptions down the line - // when code tries to process tags which are not valid at all. - if ( !self::isValidBuiltInCode( $tag ) ) { - return false; - } - - if ( isset( MediaWiki\Languages\Data\Names::$names[$tag] ) - || self::fetchLanguageName( $tag, $tag ) !== '' - ) { - return true; - } - - return false; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->isKnownLanguageTag( $tag ); } /** @@ -458,7 +435,9 @@ class Language { } else { $this->mCode = str_replace( '_', '-', strtolower( substr( static::class, 8 ) ) ); } - $this->localisationCache = MediaWikiServices::getInstance()->getLocalisationCache(); + $services = MediaWikiServices::getInstance(); + $this->localisationCache = $services->getLocalisationCache(); + $this->langNameUtils = $services->getLanguageNameUtils(); } /** @@ -764,7 +743,7 @@ class Language { if ( $usemsg && wfMessage( $msg )->exists() ) { return $this->getMessageFromDB( $msg ); } - $name = self::fetchLanguageName( $code ); + $name = $this->langNameUtils->getLanguageName( $code ); if ( $name ) { return $name; # if it's defined as a language name, show that } else { @@ -825,6 +804,8 @@ class Language { /** * Get an array of language names, indexed by code. + * + * @deprecated since 1.34, use LanguageNameUtils::getLanguageNames * @param null|string $inLanguage Code of language in which to return the names * Use self::AS_AUTONYMS for autonyms (native names) * @param string $include One of: @@ -835,95 +816,12 @@ class Language { * @since 1.20 */ public static function fetchLanguageNames( $inLanguage = self::AS_AUTONYMS, $include = 'mw' ) { - $cacheKey = $inLanguage === self::AS_AUTONYMS ? 'null' : $inLanguage; - $cacheKey .= ":$include"; - if ( self::$languageNameCache === null ) { - self::$languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] ); - } - - $ret = self::$languageNameCache->get( $cacheKey ); - if ( !$ret ) { - $ret = self::fetchLanguageNamesUncached( $inLanguage, $include ); - self::$languageNameCache->set( $cacheKey, $ret ); - } - return $ret; - } - - /** - * Uncached helper for fetchLanguageNames - * @param null|string $inLanguage Code of language in which to return the names - * Use self::AS_AUTONYMS for autonyms (native names) - * @param string $include One of: - * self::ALL all available languages - * 'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default) - * self::SUPPORTED only if the language is in 'mw' *and* has a message file - * @return array Language code => language name (sorted by key) - */ - private static function fetchLanguageNamesUncached( - $inLanguage = self::AS_AUTONYMS, - $include = 'mw' - ) { - global $wgExtraLanguageNames, $wgUsePigLatinVariant; - - // If passed an invalid language code to use, fallback to en - if ( $inLanguage !== self::AS_AUTONYMS && !self::isValidCode( $inLanguage ) ) { - $inLanguage = 'en'; - } - - $names = []; - - if ( $inLanguage ) { - # TODO: also include when $inLanguage is null, when this code is more efficient - Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] ); - } - - $mwNames = $wgExtraLanguageNames + MediaWiki\Languages\Data\Names::$names; - if ( $wgUsePigLatinVariant ) { - // Pig Latin (for variant development) - $mwNames['en-x-piglatin'] = 'Igpay Atinlay'; - } - - foreach ( $mwNames as $mwCode => $mwName ) { - # - Prefer own MediaWiki native name when not using the hook - # - For other names just add if not added through the hook - if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) { - $names[$mwCode] = $mwName; - } - } - - if ( $include === self::ALL ) { - ksort( $names ); - return $names; - } - - $returnMw = []; - $coreCodes = array_keys( $mwNames ); - foreach ( $coreCodes as $coreCode ) { - $returnMw[$coreCode] = $names[$coreCode]; - } - - if ( $include === self::SUPPORTED ) { - $namesMwFile = []; - # We do this using a foreach over the codes instead of a directory - # loop so that messages files in extensions will work correctly. - foreach ( $returnMw as $code => $value ) { - if ( is_readable( self::getMessagesFileName( $code ) ) - || is_readable( self::getJsonMessagesFileName( $code ) ) - ) { - $namesMwFile[$code] = $names[$code]; - } - } - - ksort( $namesMwFile ); - return $namesMwFile; - } - - ksort( $returnMw ); - # 'mw' option; default if it's not one of the other two options (all/mwfile) - return $returnMw; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getLanguageNames( $inLanguage, $include ); } /** + * @deprecated since 1.34, use LanguageNameUtils::getLanguageName * @param string $code The code of the language for which to get the name * @param null|string $inLanguage Code of language in which to return the name * (SELF::AS_AUTONYMS for autonyms) @@ -936,9 +834,8 @@ class Language { $inLanguage = self::AS_AUTONYMS, $include = self::ALL ) { - $code = strtolower( $code ); - $array = self::fetchLanguageNames( $inLanguage, $include ); - return !array_key_exists( $code, $array ) ? '' : $array[$code]; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getLanguageName( $code, $inLanguage, $include ); } /** @@ -4444,6 +4341,8 @@ class Language { /** * Get the name of a file for a certain language code + * + * @deprecated since 1.34, use LanguageNameUtils * @param string $prefix Prepend this to the filename * @param string $code Language code * @param string $suffix Append this to the filename @@ -4451,38 +4350,30 @@ class Language { * @return string $prefix . $mangledCode . $suffix */ public static function getFileName( $prefix, $code, $suffix = '.php' ) { - if ( !self::isValidBuiltInCode( $code ) ) { - throw new MWException( "Invalid language code \"$code\"" ); - } - - return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getFileName( $prefix, $code, $suffix ); } /** + * @deprecated since 1.34, use LanguageNameUtils * @param string $code * @return string */ public static function getMessagesFileName( $code ) { - global $IP; - $file = self::getFileName( "$IP/languages/messages/Messages", $code, '.php' ); - Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] ); - return $file; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getMessagesFileName( $code ); } /** + * @deprecated since 1.34, use LanguageNameUtils * @param string $code * @return string * @throws MWException * @since 1.23 */ public static function getJsonMessagesFileName( $code ) { - global $IP; - - if ( !self::isValidBuiltInCode( $code ) ) { - throw new MWException( "Invalid language code \"$code\"" ); - } - - return "$IP/languages/i18n/$code.json"; + return MediaWikiServices::getInstance()->getLanguageNameUtils() + ->getJsonMessagesFileName( $code ); } /** diff --git a/languages/data/Names.php b/languages/data/Names.php index 783a211cab..2ae38d51e1 100644 --- a/languages/data/Names.php +++ b/languages/data/Names.php @@ -39,7 +39,7 @@ namespace MediaWiki\Languages\Data; * If you are adding support for such a language, add it also to * the relevant section in shared.css. * - * Do not use this class directly. Use Language::fetchLanguageNames(), which + * Do not use this class directly. Use LanguageNameUtils::getLanguageNames(), which * includes support for the CLDR extension. * * @ingroup Language diff --git a/maintenance/rebuildLocalisationCache.php b/maintenance/rebuildLocalisationCache.php index 07c55698df..e5f799d7b3 100644 --- a/maintenance/rebuildLocalisationCache.php +++ b/maintenance/rebuildLocalisationCache.php @@ -97,7 +97,8 @@ class RebuildLocalisationCache extends Maintenance { [ function () { MediaWikiServices::getInstance()->getResourceLoader() ->getMessageBlobStore()->clear(); - } ] + } ], + MediaWikiServices::getInstance()->getLanguageNameUtils() ); $allCodes = array_keys( Language::fetchLanguageNames( null, 'mwfile' ) ); diff --git a/tests/common/TestsAutoLoader.php b/tests/common/TestsAutoLoader.php index 1657e81852..5968aed829 100644 --- a/tests/common/TestsAutoLoader.php +++ b/tests/common/TestsAutoLoader.php @@ -225,6 +225,7 @@ $wgAutoloadClasses += [ # tests/phpunit/unit/includes/language 'LanguageFallbackTestTrait' => "$testDir/phpunit/unit/includes/language/LanguageFallbackTestTrait.php", + 'LanguageNameUtilsTestTrait' => "$testDir/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php", # tests/phpunit/unit/includes/libs/filebackend/fsfile 'TempFSFileTestTrait' => "$testDir/phpunit/unit/includes/libs/filebackend/fsfile/TempFSFileTestTrait.php", diff --git a/tests/phpunit/MediaWikiUnitTestCase.php b/tests/phpunit/MediaWikiUnitTestCase.php index fda986ccc8..c63056da2d 100644 --- a/tests/phpunit/MediaWikiUnitTestCase.php +++ b/tests/phpunit/MediaWikiUnitTestCase.php @@ -53,7 +53,8 @@ abstract class MediaWikiUnitTestCase extends TestCase { 'wgAutoloadLocalClasses', // Need for LoggerFactory. Default is NullSpi. 'wgMWLoggerDefaultSpi', - 'wgAutoloadAttemptLowercase' + 'wgAutoloadAttemptLowercase', + 'wgLegalTitleChars' ]; } diff --git a/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php b/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php index 7f5ee0ca3c..d1e8e84fbf 100644 --- a/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php +++ b/tests/phpunit/includes/api/ApiQuerySiteinfoTest.php @@ -191,6 +191,7 @@ class ApiQuerySiteinfoTest extends ApiTestCase { 'wgExtraInterlanguageLinkPrefixes' => [ 'self' ], 'wgExtraLanguageNames' => [ 'self' => 'Recursion' ], ] ); + $this->resetServices(); MessageCache::singleton()->enable(); diff --git a/tests/phpunit/includes/cache/LocalisationCacheTest.php b/tests/phpunit/includes/cache/LocalisationCacheTest.php index af1ff86515..4dd819a61b 100644 --- a/tests/phpunit/includes/cache/LocalisationCacheTest.php +++ b/tests/phpunit/includes/cache/LocalisationCacheTest.php @@ -1,6 +1,7 @@ createMock( LanguageNameUtils::class ); + $mockLangNameUtils->method( 'isValidBuiltInCode' )->will( $this->returnCallback( + function ( $code ) { + // Copy-paste, but it's only one line + return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code ); + } + ) ); + $mockLangNameUtils->method( 'isSupportedLanguage' )->will( $this->returnCallback( + function ( $code ) { + return in_array( $code, [ + 'ar', + 'arz', + 'ba', + 'de', + 'en', + 'ksh', + 'ru', + ] ); + } + ) ); + $mockLangNameUtils->method( 'getMessagesFileName' )->will( $this->returnCallback( + function ( $code ) { + global $IP; + $code = str_replace( '-', '_', ucfirst( $code ) ); + return "$IP/languages/messages/Messages$code.php"; + } + ) ); + $mockLangNameUtils->expects( $this->never() )->method( $this->anythingBut( + 'isValidBuiltInCode', 'isSupportedLanguage', 'getMessagesFileName' + ) ); + $lc = $this->getMockBuilder( LocalisationCache::class ) ->setConstructorArgs( [ new ServiceOptions( LocalisationCache::CONSTRUCTOR_OPTIONS, [ @@ -33,7 +65,9 @@ class LocalisationCacheTest extends MediaWikiTestCase { 'MessagesDirs' => [], ] ), new LCStoreDB( [] ), - new NullLogger + new NullLogger, + [], + $mockLangNameUtils ] ) ->setMethods( [ 'getMessagesDirs' ] ) ->getMock(); diff --git a/tests/phpunit/languages/LanguageTest.php b/tests/phpunit/languages/LanguageTest.php index c443f20e0b..68dfd37bbc 100644 --- a/tests/phpunit/languages/LanguageTest.php +++ b/tests/phpunit/languages/LanguageTest.php @@ -3,6 +3,24 @@ use Wikimedia\TestingAccessWrapper; class LanguageTest extends LanguageClassesTestCase { + use LanguageNameUtilsTestTrait; + + /** @var array Copy of $wgHooks from before we unset LanguageGetTranslatedLanguageNames */ + private $origHooks; + + public function setUp() { + global $wgHooks; + + parent::setUp(); + + // Don't allow installed hooks to run, except if a test restores them via origHooks (needed + // for testIsKnownLanguageTag_cldr) + $this->origHooks = $wgHooks; + $newHooks = $wgHooks; + unset( $newHooks['LanguageGetTranslatedLanguageNames'] ); + $this->setMwGlobals( 'wgHooks', $newHooks ); + } + /** * @covers Language::convertDoubleWidth * @covers Language::normalizeForSearch @@ -510,84 +528,6 @@ class LanguageTest extends LanguageClassesTestCase { ); } - /** - * Test Language::isValidBuiltInCode() - * @dataProvider provideLanguageCodes - * @covers Language::isValidBuiltInCode - */ - public function testBuiltInCodeValidation( $code, $expected, $message = '' ) { - $this->assertEquals( $expected, - (bool)Language::isValidBuiltInCode( $code ), - "validating code $code $message" - ); - } - - public static function provideLanguageCodes() { - return [ - [ 'fr', true, 'Two letters, minor case' ], - [ 'EN', false, 'Two letters, upper case' ], - [ 'tyv', true, 'Three letters' ], - [ 'be-tarask', true, 'With dash' ], - [ 'be-x-old', true, 'With extension (two dashes)' ], - [ 'be_tarask', false, 'Reject underscores' ], - ]; - } - - /** - * Test Language::isKnownLanguageTag() - * @dataProvider provideKnownLanguageTags - * @covers Language::isKnownLanguageTag - */ - public function testKnownLanguageTag( $code, $message = '' ) { - $this->assertTrue( - (bool)Language::isKnownLanguageTag( $code ), - "validating code $code - $message" - ); - } - - public static function provideKnownLanguageTags() { - return [ - [ 'fr', 'simple code' ], - [ 'bat-smg', 'an MW legacy tag' ], - [ 'sgs', 'an internal standard MW name, for which a legacy tag is used externally' ], - ]; - } - - /** - * @covers Language::isKnownLanguageTag - */ - public function testKnownCldrLanguageTag() { - if ( !class_exists( 'LanguageNames' ) ) { - $this->markTestSkipped( 'The LanguageNames class is not available. ' - . 'The CLDR extension is probably not installed.' ); - } - - $this->assertTrue( - (bool)Language::isKnownLanguageTag( 'pal' ), - 'validating code "pal" an ancient language, which probably will ' - . 'not appear in Names.php, but appears in CLDR in English' - ); - } - - /** - * Negative tests for Language::isKnownLanguageTag() - * @dataProvider provideUnKnownLanguageTags - * @covers Language::isKnownLanguageTag - */ - public function testUnknownLanguageTag( $code, $message = '' ) { - $this->assertFalse( - (bool)Language::isKnownLanguageTag( $code ), - "checking that code $code is invalid - $message" - ); - } - - public static function provideUnknownLanguageTags() { - return [ - [ 'mw', 'non-existent two-letter code' ], - [ 'foo"getGrammarTransformations(); $this->assertNotNull( $languageClass->grammarTransformations ); - // Populate $languageNameCache - Language::fetchLanguageNames(); - $this->assertNotNull( $languageClass->languageNameCache ); - Language::clearCaches(); $this->assertCount( 0, Language::$mLangObjCache ); $this->assertCount( 0, $languageClass->fallbackLanguageCache ); $this->assertNull( $languageClass->grammarTransformations ); - $this->assertNull( $languageClass->languageNameCache ); - } - - /** - * @dataProvider provideIsSupportedLanguage - * @covers Language::isSupportedLanguage - */ - public function testIsSupportedLanguage( $code, $expected, $comment ) { - $this->assertEquals( $expected, Language::isSupportedLanguage( $code ), $comment ); - } - - public static function provideIsSupportedLanguage() { - return [ - [ 'en', true, 'is supported language' ], - [ 'fi', true, 'is supported language' ], - [ 'bunny', false, 'is not supported language' ], - [ 'FI', false, 'is not supported language, input should be in lower case' ], - ]; } /** @@ -1956,4 +1874,82 @@ class LanguageTest extends LanguageClassesTestCase { [ 'èl', 'Ll' , 'Non-ASCII is overridden', [ 'è' => 'L' ] ], ]; } + + // The following methods are for LanguageNameUtilsTestTrait + + private function isSupportedLanguage( $code ) { + return Language::isSupportedLanguage( $code ); + } + + private function isValidCode( $code ) { + return Language::isValidCode( $code ); + } + + private function isValidBuiltInCode( $code ) { + return Language::isValidBuiltInCode( $code ); + } + + private function isKnownLanguageTag( $code ) { + return Language::isKnownLanguageTag( $code ); + } + + /** + * Call getLanguageName() and getLanguageNames() using the Language static methods. + * + * @param array $options To set globals for testing Language + * @param string $expected + * @param string $code + * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include. + */ + private function assertGetLanguageNames( array $options, $expected, $code, ...$otherArgs ) { + if ( $options ) { + foreach ( $options as $key => $val ) { + $this->setMwGlobals( "wg$key", $val ); + } + $this->resetServices(); + } + $this->assertSame( $expected, + Language::fetchLanguageNames( ...$otherArgs )[strtolower( $code )] ?? '' ); + $this->assertSame( $expected, Language::fetchLanguageName( $code, ...$otherArgs ) ); + } + + private function getLanguageNames( ...$args ) { + return Language::fetchLanguageNames( ...$args ); + } + + private function getLanguageName( ...$args ) { + return Language::fetchLanguageName( ...$args ); + } + + private static function getFileName( ...$args ) { + return Language::getFileName( ...$args ); + } + + private static function getMessagesFileName( $code ) { + return Language::getMessagesFileName( $code ); + } + + private static function getJsonMessagesFileName( $code ) { + return Language::getJsonMessagesFileName( $code ); + } + + /** + * @todo This really belongs in the cldr extension's tests. + * + * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag + * @covers Language::isKnownLanguageTag + */ + public function testIsKnownLanguageTag_cldr() { + if ( !class_exists( 'LanguageNames' ) ) { + $this->markTestSkipped( 'The LanguageNames class is not available. ' + . 'The CLDR extension is probably not installed.' ); + } + + // We need to restore the extension's hook that we removed. + $this->setMwGlobals( 'wgHooks', $this->origHooks ); + + // "pal" is an ancient language, which probably will not appear in Names.php, but appears in + // CLDR in English + $this->assertTrue( Language::isKnownLanguageTag( 'pal' ) ); + } } diff --git a/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php b/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php new file mode 100644 index 0000000000..6fbd4a2863 --- /dev/null +++ b/tests/phpunit/unit/includes/language/LanguageNameUtilsTest.php @@ -0,0 +1,66 @@ + [], + 'LanguageCode' => 'en', + 'UsePigLatinVariant' => false, + ] + ) ); + } + + use LanguageNameUtilsTestTrait; + + private function isSupportedLanguage( $code ) { + return $this->newObj()->isSupportedLanguage( $code ); + } + + private function isValidCode( $code ) { + return $this->newObj()->isValidCode( $code ); + } + + private function isValidBuiltInCode( $code ) { + return $this->newObj()->isValidBuiltInCode( $code ); + } + + private function isKnownLanguageTag( $code ) { + return $this->newObj()->isKnownLanguageTag( $code ); + } + + private function assertGetLanguageNames( array $options, $expected, $code, ...$otherArgs ) { + $this->assertSame( $expected, $this->newObj( $options ) + ->getLanguageNames( ...$otherArgs )[strtolower( $code )] ?? '' ); + $this->assertSame( $expected, + $this->newObj( $options )->getLanguageName( $code, ...$otherArgs ) ); + } + + private function getLanguageNames( ...$args ) { + return $this->newObj()->getLanguageNames( ...$args ); + } + + private function getLanguageName( ...$args ) { + return $this->newObj()->getLanguageName( ...$args ); + } + + private static function getFileName( ...$args ) { + return self::newObj()->getFileName( ...$args ); + } + + private static function getMessagesFileName( $code ) { + return self::newObj()->getMessagesFileName( $code ); + } + + private static function getJsonMessagesFileName( $code ) { + return self::newObj()->getJsonMessagesFileName( $code ); + } +} diff --git a/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php b/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php new file mode 100644 index 0000000000..bd777e9a58 --- /dev/null +++ b/tests/phpunit/unit/includes/language/LanguageNameUtilsTestTrait.php @@ -0,0 +1,555 @@ +assertSame( $expected, $this->isSupportedLanguage( $code ) ); + } + + public static function provideIsSupportedLanguage() { + return [ + 'en' => [ 'en', true ], + 'fi' => [ 'fi', true ], + 'bunny' => [ 'bunny', false ], + 'qqq' => [ 'qqq', false ], + 'uppercase is not considered supported' => [ 'FI', false ], + ]; + } + + abstract protected function isValidCode( $code ); + + /** + * We don't test that the result is cached, because that should only be noticeable if the + * configuration changes in between calls, and 1) that should never happen in normal operation, + * 2) if you do it you deserve whatever you get, and 3) once the static Language method is + * dropped and the invalid title regex is moved to something injected instead of a static call, + * the cache will be undetectable. + * + * @todo Should we test changes to $wgLegalTitleChars here? Does anybody actually change that? + * Is it possible to change it usefully without breaking everything? + * + * @dataProvider provideIsValidCode + * @covers MediaWiki\Languages\LanguageNameUtils::isValidCode + * @covers Language::isValidCode + * + * @param string $code + * @param bool $expected + */ + public function testIsValidCode( $code, $expected ) { + $this->assertSame( $expected, $this->isValidCode( $code ) ); + } + + public static function provideIsValidCode() { + $ret = [ + 'en' => [ 'en', true ], + 'en-GB' => [ 'en-GB', true ], + 'Funny chars' => [ "%!$()*,-.;=?@^_`~\x80\xA2\xFF+", true ], + 'Percent escape not allowed' => [ 'a%aF', false ], + 'Percent with only one following char is okay' => [ '%a', true ], + 'Percent with non-hex following chars is okay' => [ '%AG', true ], + 'Named char reference "a"' => [ 'a&a', false ], + 'Named char reference "A"' => [ 'a&A', false ], + 'Named char reference "0"' => [ 'a&0', false ], + 'Named char reference non-ASCII' => [ "a&\x92", false ], + 'Numeric char reference' => [ "a�", false ], + 'Hex char reference 0' => [ "a�", false ], + 'Hex char reference A' => [ "a ", false ], + 'Lone ampersand is valid for title but not lang code' => [ '&', false ], + 'Ampersand followed by just # is valid for title but not lang code' => [ '&#', false ], + 'Ampersand followed by # and non-x/digit is valid for title but not lang code' => + [ '&#a', false ], + ]; + $disallowedChars = ":/\\\000&<>'\""; + foreach ( str_split( $disallowedChars ) as $char ) { + $ret["Disallowed character $char"] = [ "a{$char}a", false ]; + } + return $ret; + } + + abstract protected function isValidBuiltInCode( $code ); + + /** + * @dataProvider provideIsValidBuiltInCode + * @covers MediaWiki\Languages\LanguageNameUtils::isValidBuiltInCode + * @covers Language::isValidBuiltInCode + * + * @param string $code + * @param bool $expected + */ + public function testIsValidBuiltInCode( $code, $expected ) { + $this->assertSame( $expected, $this->isValidBuiltInCode( $code ) ); + } + + public static function provideIsValidBuiltInCode() { + return [ + 'Two letters, lowercase' => [ 'fr', true ], + 'Two letters, uppercase' => [ 'EN', false ], + 'Three letters' => [ 'tyv', true ], + 'With dash' => [ 'be-tarask', true ], + 'With extension (two dashes)' => [ 'be-x-old', true ], + 'Reject underscores' => [ 'be_tarask', false ], + 'One letter' => [ 'a', false ], + 'Only digits' => [ '00', true ], + 'Only dashes' => [ '--', true ], + 'Unreasonably long' => [ str_repeat( 'x', 100 ), true ], + 'qqq' => [ 'qqq', true ], + ]; + } + + abstract protected function isKnownLanguageTag( $code ); + + /** + * @dataProvider provideIsKnownLanguageTag + * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag + * @covers Language::isKnownLanguageTag + * + * @param string $code + * @param bool $expected + */ + public function testIsKnownLanguageTag( $code, $expected ) { + $this->assertSame( $expected, $this->isKnownLanguageTag( $code ) ); + } + + public static function provideIsKnownLanguageTag() { + $invalidBuiltInCodes = array_filter( static::provideIsValidBuiltInCode(), + function ( $arr ) { + // If isValidBuiltInCode() returns false, we want to also, but if it returns true, + // we could still return false from isKnownLanguageTag(), so skip those. + return !$arr[1]; + } + ); + return array_merge( $invalidBuiltInCodes, [ + 'Simple code' => [ 'fr', true ], + 'An MW legacy tag' => [ 'bat-smg', true ], + 'An internal standard MW name, for which a legacy tag is used externally' => + [ 'sgs', true ], + 'Non-existent two-letter code' => [ 'mw', false ], + 'Very invalid language code' => [ 'foo"assertGetLanguageNames( [], $expected, $code, ...$otherArgs ); + } + + public static function provideGetLanguageNames() { + // @todo There are probably lots of interesting tests to add here. + return [ + 'Simple code' => [ 'Deutsch', 'de' ], + 'Simple code in a different language (doesn\'t work without hook)' => + [ 'Deutsch', 'de', 'fr' ], + 'Invalid code' => [ '', '&' ], + 'Pig Latin not enabled' => [ '', 'en-x-piglatin', AUTONYMS, ALL ], + 'qqq doesn\'t have a name' => [ '', 'qqq', AUTONYMS, ALL ], + 'An MW legacy tag is recognized' => [ 'žemaitėška', 'bat-smg' ], + // @todo Is the next test's result desired? + 'An MW legacy tag is not supported' => [ '', 'bat-smg', AUTONYMS, SUPPORTED ], + 'An internal standard name, for which a legacy tag is used externally, is supported' => + [ 'žemaitėška', 'sgs', AUTONYMS, SUPPORTED ], + ]; + } + + /** + * @dataProvider provideGetLanguageNames_withHook + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + * + * @param string $expected Expected return value of getLanguageName() + * @param string $code + * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include. + */ + public function testGetLanguageNames_withHook( $expected, $code, ...$otherArgs ) { + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function ( &$names, $inLanguage ) { + switch ( $inLanguage ) { + case 'de': + $names = [ + 'de' => 'Deutsch', + 'en' => 'Englisch', + 'fr' => 'Französisch', + ]; + break; + + case 'en': + $names = [ + 'de' => 'German', + 'en' => 'English', + 'fr' => 'French', + 'sqsqsqsq' => '!!?!', + 'bat-smg' => 'Samogitian', + ]; + break; + + case 'fr': + $names = [ + 'de' => 'allemand', + 'en' => 'anglais', + // Deliberate mistake (no cedilla) + 'fr' => 'francais', + ]; + break; + } + } + ); + + // Really we could dispense with assertGetLanguageNames() and just call + // testGetLanguageNames() here, but it looks weird to call a test method from another test + // method. + $this->assertGetLanguageNames( [], $expected, $code, ...$otherArgs ); + } + + public static function provideGetLanguageNames_withHook() { + return [ + 'Simple code in a different language' => [ 'allemand', 'de', 'fr' ], + 'Invalid inLanguage defaults to English' => [ 'German', 'de', '&' ], + 'If inLanguage not provided, default to autonym' => [ 'Deutsch', 'de' ], + 'Hooks ignored for explicitly-requested autonym' => [ 'français', 'fr', 'fr' ], + 'Hooks don\'t make a language supported' => [ '', 'bat-smg', 'en', SUPPORTED ], + 'Hooks don\'t make a language defined' => [ '', 'sqsqsqsq', 'en', DEFINED ], + 'Hooks do make a language name returned with ALL' => [ '!!?!', 'sqsqsqsq', 'en', ALL ], + ]; + } + + /** + * @dataProvider provideGetLanguageNames_ExtraLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + * + * @param string $expected Expected return value of getLanguageName() + * @param string $code + * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include. + */ + public function testGetLanguageNames_ExtraLanguageNames( $expected, $code, ...$otherArgs ) { + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function ( &$names ) { + $names['de'] = 'die deutsche Sprache'; + } + ); + $this->assertGetLanguageNames( + [ 'ExtraLanguageNames' => [ 'de' => 'deutsche Sprache', 'sqsqsqsq' => '!!?!' ] ], + $expected, $code, ...$otherArgs + ); + } + + public static function provideGetLanguageNames_ExtraLanguageNames() { + return [ + 'Simple extra language name' => [ '!!?!', 'sqsqsqsq' ], + 'Extra language is defined' => [ '!!?!', 'sqsqsqsq', AUTONYMS, DEFINED ], + 'Extra language is not supported' => [ '', 'sqsqsqsq', AUTONYMS, SUPPORTED ], + 'Extra language overrides default' => [ 'deutsche Sprache', 'de' ], + 'Extra language overrides hook for explicitly requested autonym' => + [ 'deutsche Sprache', 'de', 'de' ], + 'Hook overrides extra language for non-autonym' => + [ 'die deutsche Sprache', 'de', 'fr' ], + ]; + } + + /** + * Test that getLanguageNames() defaults to DEFINED, and getLanguageName() defaults to ALL. + * + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + */ + public function testGetLanguageNames_parameterDefault() { + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function ( &$names ) { + $names = [ 'sqsqsqsq' => '!!?!' ]; + } + ); + + // We use 'en' here because the hook is not run if we're requesting autonyms, although in + // this case (language that isn't defined by MediaWiki itself) that behavior seems wrong. + $this->assertArrayNotHasKey( 'sqsqsqsq', $this->getLanguageNames(), 'en' ); + + $this->assertSame( '!!?!', $this->getLanguageName( 'sqsqsqsq', 'en' ) ); + } + + /** + * @dataProvider provideGetLanguageNames_sorted + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers Language::fetchLanguageNames + * + * @param mixed ...$args To pass to method + */ + public function testGetLanguageNames_sorted( ...$args ) { + $names = $this->getLanguageNames( ...$args ); + $sortedNames = $names; + ksort( $sortedNames ); + $this->assertSame( $sortedNames, $names ); + } + + public static function provideGetLanguageNames_sorted() { + return [ + [], + [ AUTONYMS ], + [ AUTONYMS, 'mw' ], + [ AUTONYMS, ALL ], + [ AUTONYMS, SUPPORTED ], + [ 'he', 'mw' ], + [ 'he', ALL ], + [ 'he', SUPPORTED ], + ]; + } + + /** + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers Language::fetchLanguageNames + */ + public function testGetLanguageNames_hookNotCalledForAutonyms() { + $count = 0; + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function () use ( &$count ) { + $count++; + } + ); + + $this->getLanguageNames(); + $this->assertSame( 0, $count, 'Hook must not be called for autonyms' ); + + // We test elsewhere that the hook works, but the following verifies that our test is + // working and $count isn't being incremented above only because we're checking autonyms. + $this->getLanguageNames( 'fr' ); + $this->assertSame( 1, $count, 'Hook must be called for non-autonyms' ); + } + + /** + * @dataProvider provideGetLanguageNames_pigLatin + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + * + * @param string $expected + * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include. + */ + public function testGetLanguageNames_pigLatin( $expected, ...$otherArgs ) { + $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames', + function ( &$names, $inLanguage ) { + switch ( $inLanguage ) { + case 'fr': + $names = [ 'en-x-piglatin' => 'latin de cochons' ]; + break; + + case 'en-x-piglatin': + // Deliberately lowercase + $names = [ 'en-x-piglatin' => 'igpay atinlay' ]; + break; + } + } + ); + + $this->assertGetLanguageNames( + [ 'UsePigLatinVariant' => true ], $expected, 'en-x-piglatin', ...$otherArgs ); + } + + public static function provideGetLanguageNames_pigLatin() { + return [ + 'Simple test' => [ 'Igpay Atinlay' ], + 'Not supported' => [ '', AUTONYMS, SUPPORTED ], + 'Foreign language' => [ 'latin de cochons', 'fr' ], + 'Hook doesn\'t override explicit autonym' => + [ 'Igpay Atinlay', 'en-x-piglatin', 'en-x-piglatin' ], + ]; + } + + /** + * Just for the sake of completeness, test that ExtraLanguageNames will not override the name + * for pig Latin. Nobody actually cares about this and if anything current behavior is probably + * wrong, but once we're testing the whole file we may as well be comprehensive. + * + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached + * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName + * @covers Language::fetchLanguageNames + * @covers Language::fetchLanguageName + */ + public function testGetLanguageNames_pigLatinAndExtraLanguageNames() { + $this->assertGetLanguageNames( + [ + 'UsePigLatinVariant' => true, + 'ExtraLanguageNames' => [ 'en-x-piglatin' => 'igpay atinlay' ] + ], + 'Igpay Atinlay', + 'en-x-piglatin' + ); + } + + abstract protected static function getFileName( ...$args ); + + /** + * @dataProvider provideGetFileName + * @covers MediaWiki\Languages\LanguageNameUtils::getFileName + * @covers Language::getFileName + * + * @param string $expected + * @param mixed ...$args To pass to method + */ + public function testGetFileName( $expected, ...$args ) { + $this->assertSame( $expected, $this->getFileName( ...$args ) ); + } + + public static function provideGetFileName() { + return [ + 'Simple case' => [ 'MessagesXx.php', 'Messages', 'xx' ], + 'With extension' => [ 'MessagesXx.ext', 'Messages', 'xx', '.ext' ], + 'Replacing dashes' => [ '!__?', '!', '--', '?' ], + 'Empty prefix and extension' => [ 'Xx', '', 'xx', '' ], + 'Uppercase only first letter' => [ 'Messages_a.php', 'Messages', '-a' ], + ]; + } + + abstract protected function getMessagesFileName( $code ); + + /** + * @dataProvider provideGetMessagesFileName + * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName + * @covers Language::getMessagesFileName + * + * @param string $code + * @param string $expected + */ + public function testGetMessagesFileName( $code, $expected ) { + $this->assertSame( $expected, $this->getMessagesFileName( $code ) ); + } + + public static function provideGetMessagesFileName() { + global $IP; + return [ + 'Simple case' => [ 'en', "$IP/languages/messages/MessagesEn.php" ], + 'Replacing dashes' => [ '--', "$IP/languages/messages/Messages__.php" ], + 'Uppercase only first letter' => [ '-a', "$IP/languages/messages/Messages_a.php" ], + ]; + } + + /** + * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName + * @covers Language::getMessagesFileName + */ + public function testGetMessagesFileName_withHook() { + $called = 0; + + $this->setTemporaryHook( 'Language::getMessagesFileName', + function ( $code, &$file ) use ( &$called ) { + global $IP; + + $called++; + + $this->assertSame( 'ab-cd', $code ); + $this->assertSame( "$IP/languages/messages/MessagesAb_cd.php", $file ); + $file = 'bye-bye'; + } + ); + + $this->assertSame( 'bye-bye', $this->getMessagesFileName( 'ab-cd' ) ); + $this->assertSame( 1, $called ); + } + + abstract protected function getJsonMessagesFileName( $code ); + + /** + * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName + * @covers Language::getJsonMessagesFileName + */ + public function testGetJsonMessagesFileName() { + global $IP; + + // Not so much to test here, one test seems to be enough + $expected = "$IP/languages/i18n/en--123.json"; + $this->assertSame( $expected, $this->getJsonMessagesFileName( 'en--123' ) ); + } + + /** + * getFileName, getMessagesFileName, and getJsonMessagesFileName all throw if they get an + * invalid code. To save boilerplate, test them all in one method. + * + * @dataProvider provideExceptionFromInvalidCode + * @covers MediaWiki\Languages\LanguageNameUtils::getFileName + * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName + * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName + * @covers Language::getFileName + * @covers Language::getMessagesFileName + * @covers Language::getJsonMessagesFileName + * + * @param callable $callback Will throw when passed $code + * @param string $code + */ + public function testExceptionFromInvalidCode( $callback, $code ) { + $this->setExpectedException( MWException::class, "Invalid language code \"$code\"" ); + + $callback( $code ); + } + + public static function provideExceptionFromInvalidCode() { + $ret = []; + foreach ( static::provideIsValidBuiltInCode() as $desc => list( $code, $valid ) ) { + if ( $valid ) { + // Won't get an exception from this one + continue; + } + + // For getFileName, we define an anonymous function because of the extra first param + $ret["getFileName: $desc"] = [ + function ( $code ) { + return static::getFileName( 'Messages', $code ); + }, + $code + ]; + + $ret["getMessagesFileName: $desc"] = + [ [ static::class, 'getMessagesFileName' ], $code ]; + + $ret["getJsonMessagesFileName: $desc"] = + [ [ static::class, 'getJsonMessagesFileName' ], $code ]; + } + return $ret; + } +} -- 2.20.1