Merge "Split some Language methods to LanguageNameUtils"
[lhc/web/wiklou.git] / includes / language / LanguageNameUtils.php
1 <?php
2 /**
3 * Internationalisation code.
4 * See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Language
23 */
24
25 /**
26 * @defgroup Language Language
27 */
28
29 namespace MediaWiki\Languages;
30
31 use HashBagOStuff;
32 use Hooks;
33 use MediaWiki\Config\ServiceOptions;
34 use MediaWikiTitleCodec;
35 use MWException;
36 use Wikimedia\Assert\Assert;
37
38 /**
39 * @ingroup Language
40 *
41 * A service that provides utilities to do with language names and codes.
42 *
43 * @since 1.34
44 */
45 class LanguageNameUtils {
46 /**
47 * Return autonyms in getLanguageName(s).
48 */
49 const AUTONYMS = null;
50
51 /**
52 * Return all known languages in getLanguageName(s).
53 */
54 const ALL = 'all';
55
56 /**
57 * Return in getLanguageName(s) only the languages that are defined by MediaWiki.
58 */
59 const DEFINED = 'mw';
60
61 /**
62 * Return in getLanguageName(s) only the languages for which we have at least some localisation.
63 */
64 const SUPPORTED = 'mwfile';
65
66 /** @var ServiceOptions */
67 private $options;
68
69 /**
70 * Cache for language names
71 * @var HashBagOStuff|null
72 */
73 private $languageNameCache;
74
75 /**
76 * Cache for validity of language codes
77 * @var array
78 */
79 private $validCodeCache = [];
80
81 public static $constructorOptions = [
82 'ExtraLanguageNames',
83 'UsePigLatinVariant',
84 ];
85
86 /**
87 * @param ServiceOptions $options
88 */
89 public function __construct( ServiceOptions $options ) {
90 $options->assertRequiredOptions( self::$constructorOptions );
91 $this->options = $options;
92 }
93
94 /**
95 * Checks whether any localisation is available for that language tag in MediaWiki
96 * (MessagesXx.php or xx.json exists).
97 *
98 * @param string $code Language tag (in lower case)
99 * @return bool Whether language is supported
100 */
101 public function isSupportedLanguage( $code ) {
102 if ( !$this->isValidBuiltInCode( $code ) ) {
103 return false;
104 }
105
106 if ( $code === 'qqq' ) {
107 // Special code for internal use, not supported even though there is a qqq.json
108 return false;
109 }
110
111 return is_readable( $this->getMessagesFileName( $code ) ) ||
112 is_readable( $this->getJsonMessagesFileName( $code ) );
113 }
114
115 /**
116 * Returns true if a language code string is of a valid form, whether or not it exists. This
117 * includes codes which are used solely for customisation via the MediaWiki namespace.
118 *
119 * @param string $code
120 *
121 * @return bool
122 */
123 public function isValidCode( $code ) {
124 Assert::parameterType( 'string', $code, '$code' );
125 if ( !isset( $this->validCodeCache[$code] ) ) {
126 // People think language codes are HTML-safe, so enforce it. Ideally we should only
127 // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks. See bugs
128 // T39564, T39587, T38938.
129 $this->validCodeCache[$code] =
130 // Protect against path traversal
131 strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) &&
132 !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code );
133 }
134 return $this->validCodeCache[$code];
135 }
136
137 /**
138 * Returns true if a language code is of a valid form for the purposes of internal customisation
139 * of MediaWiki, via Messages*.php or *.json.
140 *
141 * @param string $code
142 * @return bool
143 */
144 public function isValidBuiltInCode( $code ) {
145 Assert::parameterType( 'string', $code, '$code' );
146
147 return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
148 }
149
150 /**
151 * Returns true if a language code is an IETF tag known to MediaWiki.
152 *
153 * @param string $tag
154 *
155 * @return bool
156 */
157 public function isKnownLanguageTag( $tag ) {
158 // Quick escape for invalid input to avoid exceptions down the line when code tries to
159 // process tags which are not valid at all.
160 if ( !$this->isValidBuiltInCode( $tag ) ) {
161 return false;
162 }
163
164 if ( isset( Data\Names::$names[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) {
165 return true;
166 }
167
168 return false;
169 }
170
171 /**
172 * Get an array of language names, indexed by code.
173 * @param null|string $inLanguage Code of language in which to return the names
174 * Use self::AUTONYMS for autonyms (native names)
175 * @param string $include One of:
176 * self::ALL all available languages
177 * self::DEFINED only if the language is defined in MediaWiki or wgExtraLanguageNames
178 * (default)
179 * self::SUPPORTED only if the language is in self::DEFINED *and* has a message file
180 * @return array Language code => language name (sorted by key)
181 */
182 public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) {
183 $cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage;
184 $cacheKey .= ":$include";
185 if ( !$this->languageNameCache ) {
186 $this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] );
187 }
188
189 $ret = $this->languageNameCache->get( $cacheKey );
190 if ( !$ret ) {
191 $ret = $this->getLanguageNamesUncached( $inLanguage, $include );
192 $this->languageNameCache->set( $cacheKey, $ret );
193 }
194 return $ret;
195 }
196
197 /**
198 * Uncached helper for getLanguageNames
199 * @param null|string $inLanguage As getLanguageNames
200 * @param string $include As getLanguageNames
201 * @return array Language code => language name (sorted by key)
202 */
203 private function getLanguageNamesUncached( $inLanguage, $include ) {
204 // If passed an invalid language code to use, fallback to en
205 if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) {
206 $inLanguage = 'en';
207 }
208
209 $names = [];
210
211 if ( $inLanguage !== self::AUTONYMS ) {
212 # TODO: also include for self::AUTONYMS, when this code is more efficient
213 Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] );
214 }
215
216 $mwNames = $this->options->get( 'ExtraLanguageNames' ) + Data\Names::$names;
217 if ( $this->options->get( 'UsePigLatinVariant' ) ) {
218 // Pig Latin (for variant development)
219 $mwNames['en-x-piglatin'] = 'Igpay Atinlay';
220 }
221
222 foreach ( $mwNames as $mwCode => $mwName ) {
223 # - Prefer own MediaWiki native name when not using the hook
224 # - For other names just add if not added through the hook
225 if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
226 $names[$mwCode] = $mwName;
227 }
228 }
229
230 if ( $include === self::ALL ) {
231 ksort( $names );
232 return $names;
233 }
234
235 $returnMw = [];
236 $coreCodes = array_keys( $mwNames );
237 foreach ( $coreCodes as $coreCode ) {
238 $returnMw[$coreCode] = $names[$coreCode];
239 }
240
241 if ( $include === self::SUPPORTED ) {
242 $namesMwFile = [];
243 # We do this using a foreach over the codes instead of a directory loop so that messages
244 # files in extensions will work correctly.
245 foreach ( $returnMw as $code => $value ) {
246 if ( is_readable( $this->getMessagesFileName( $code ) ) ||
247 is_readable( $this->getJsonMessagesFileName( $code ) )
248 ) {
249 $namesMwFile[$code] = $names[$code];
250 }
251 }
252
253 ksort( $namesMwFile );
254 return $namesMwFile;
255 }
256
257 ksort( $returnMw );
258 # self::DEFINED option; default if it's not one of the other two options
259 # (self::ALL/self::SUPPORTED)
260 return $returnMw;
261 }
262
263 /**
264 * @param string $code The code of the language for which to get the name
265 * @param null|string $inLanguage Code of language in which to return the name (self::AUTONYMS
266 * for autonyms)
267 * @param string $include See getLanguageNames(), except this defaults to self::ALL instead of
268 * self::DEFINED
269 * @return string Language name or empty
270 * @since 1.20
271 */
272 public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) {
273 $code = strtolower( $code );
274 $array = $this->getLanguageNames( $inLanguage, $include );
275 return $array[$code] ?? '';
276 }
277
278 /**
279 * Get the name of a file for a certain language code
280 * @param string $prefix Prepend this to the filename
281 * @param string $code Language code
282 * @param string $suffix Append this to the filename
283 * @throws MWException
284 * @return string $prefix . $mangledCode . $suffix
285 */
286 public function getFileName( $prefix, $code, $suffix = '.php' ) {
287 if ( !$this->isValidBuiltInCode( $code ) ) {
288 throw new MWException( "Invalid language code \"$code\"" );
289 }
290
291 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
292 }
293
294 /**
295 * @param string $code
296 * @return string
297 */
298 public function getMessagesFileName( $code ) {
299 global $IP;
300 $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' );
301 Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] );
302 return $file;
303 }
304
305 /**
306 * @param string $code
307 * @return string
308 * @throws MWException
309 */
310 public function getJsonMessagesFileName( $code ) {
311 global $IP;
312
313 if ( !$this->isValidBuiltInCode( $code ) ) {
314 throw new MWException( "Invalid language code \"$code\"" );
315 }
316
317 return "$IP/languages/i18n/$code.json";
318 }
319 }