return new IcuCollation( 'root' );
case 'xx-uca-ckb':
return new CollationCkb;
+ case 'xx-uca-et':
+ return new CollationEt;
default:
$match = array();
if ( preg_match( '/^uca-([a-z@=-]+)$/', $collationName, $match ) ) {
/** @var Collator */
private $mainCollator;
- /** @var */
+ /** @var string */
private $locale;
/** @var Language */
'el' => array(),
'eo' => array( "Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ" ),
'es' => array( "Ñ" ),
- 'et' => array( "Š", "Ž", "Õ", "Ä", "Ö", "Ü" ),
+ 'et' => array( "Š", "Ž", "Õ", "Ä", "Ö", "Ü", "W" ), // added W for CollationEt (xx-uca-et)
'eu' => array( "Ñ" ),
'fo' => array( "Á", "Ð", "Í", "Ó", "Ú", "Ý", "Æ", "Ø", "Å" ),
'fur' => array( "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ),
}
$cache = wfGetCache( CACHE_ANYTHING );
- $cacheKey = wfMemcKey( 'first-letters', $this->locale, $this->digitTransformLanguage->getCode() );
+ $cacheKey = wfMemcKey(
+ 'first-letters',
+ $this->locale,
+ $this->digitTransformLanguage->getCode(),
+ self::getICUVersion()
+ );
$cacheEntry = $cache->get( $cacheKey );
if ( $cacheEntry && isset( $cacheEntry['version'] )
$this->digitTransformLanguage = Language::factory( 'ckb' );
}
}
+
+/**
+ * Workaround for incorrect collation of Estonian language ('et') in ICU (bug 54168).
+ *
+ * 'W' and 'V' should not be considered the same letter for the purposes of collation in modern
+ * Estonian. We work around this by replacing 'W' and 'w' with 'ᴡ' U+1D21 'LATIN LETTER SMALL
+ * CAPITAL W' for sortkey generation, which is collated like 'W' and is not tailored to have the
+ * same primary weight as 'V' in Estonian.
+ */
+class CollationEt extends IcuCollation {
+ function __construct() {
+ parent::__construct( 'et' );
+ }
+
+ private static function mangle( $string ) {
+ return str_replace(
+ array( 'w', 'W' ),
+ 'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W'
+ $string
+ );
+ }
+
+ private static function unmangle( $string ) {
+ // Casing data is lost…
+ return str_replace(
+ 'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W'
+ 'W',
+ $string
+ );
+ }
+
+ function getSortKey( $string ) {
+ return parent::getSortKey( self::mangle( $string ) );
+ }
+
+ function getFirstLetter( $string ) {
+ return self::unmangle( parent::getFirstLetter( self::mangle( $string ) ) );
+ }
+}