public $dateFormatStrings = [];
public $mExtendedSpecialPageAliases;
- protected $namespaceNames, $mNamespaceIds, $namespaceAliases;
+ /** @var array|null */
+ protected $namespaceNames;
+ protected $mNamespaceIds, $namespaceAliases;
/**
* ReplacementArray object caches
*/
static private $fallbackLanguageCache = [];
+ /**
+ * Cache for grammar rules data
+ * @var MapCacheLRU|null
+ */
+ static private $grammarTransformations;
+
/**
* Cache for language names
* @var HashBagOStuff|null
// People think language codes are html safe, so enforce it.
// Ideally we should only allow a-zA-Z0-9-
// but, .+ and other chars are often used for {{int:}} hacks
- // see bugs 37564, 37587, 36938
+ // see bugs T39564, T39587, T38938
$cache[$code] =
// Protect against path traversal
strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code )
if ( is_null( $this->namespaceNames ) ) {
global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
- $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
$validNamespaces = MWNamespace::getCanonicalNamespaces();
- $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
+ $this->namespaceNames = $wgExtraNamespaces +
+ self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
+ $this->namespaceNames += $validNamespaces;
$this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
if ( $wgMetaNamespaceTalk ) {
/**
* @param string $key
- * @return array|null
+ * @return string|null
*/
public function getMessage( $key ) {
return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
return $wgGrammarForms[$this->getCode()][$case][$word];
}
+ $grammarTransformations = $this->getGrammarTransformations();
+
+ if ( isset( $grammarTransformations[$case] ) ) {
+ $forms = $grammarTransformations[$case];
+
+ // Some names of grammar rules are aliases for other rules.
+ // In such cases the value is a string rather than object,
+ // so load the actual rules.
+ if ( is_string( $forms ) ) {
+ $forms = $grammarTransformations[$forms];
+ }
+
+ foreach ( array_values( $forms ) as $rule ) {
+ $form = $rule[0];
+
+ if ( $form === '@metadata' ) {
+ continue;
+ }
+
+ $replacement = $rule[1];
+
+ $regex = '/' . addcslashes( $form, '/' ) . '/u';
+ $patternMatches = preg_match( $regex, $word );
+
+ if ( $patternMatches === false ) {
+ wfLogWarning(
+ 'An error occurred while processing grammar. ' .
+ "Word: '$word'. Regex: /$form/."
+ );
+ } elseif ( $patternMatches === 1 ) {
+ $word = preg_replace( $regex, $replacement, $word );
+
+ break;
+ }
+ }
+ }
+
return $word;
}
+
/**
* Get the grammar forms for the content language
* @return array Array of grammar forms
return [];
}
+
+ /**
+ * Get the grammar transformations data for the language.
+ * Used like grammar forms, with {{GRAMMAR}} and cases,
+ * but uses pairs of regexes and replacements instead of code.
+ *
+ * @return array[] Array of grammar transformations.
+ * @throws MWException
+ * @since 1.28
+ */
+ public function getGrammarTransformations() {
+ $languageCode = $this->getCode();
+
+ if ( self::$grammarTransformations === null ) {
+ self::$grammarTransformations = new MapCacheLRU( 10 );
+ }
+
+ if ( self::$grammarTransformations->has( $languageCode ) ) {
+ return self::$grammarTransformations->get( $languageCode );
+ }
+
+ $data = [];
+
+ $grammarDataFile = __DIR__ . "/data/grammarTransformations/$languageCode.json";
+ if ( is_readable( $grammarDataFile ) ) {
+ $data = FormatJson::decode(
+ file_get_contents( $grammarDataFile ),
+ true
+ );
+
+ if ( $data === null ) {
+ throw new MWException( "Invalid grammar data for \"$languageCode\"." );
+ }
+
+ self::$grammarTransformations->set( $languageCode, $data );
+ }
+
+ return $data;
+ }
+
/**
* Provides an alternative text depending on specified gender.
* Usage {{gender:username|masculine|feminine|unknown}}.