return new IcuCollation( 'root' );
case 'xx-uca-ckb':
return new CollationCkb;
+ case 'xx-uca-et':
+ return new CollationEt;
default:
$match = array();
if ( preg_match( '/^uca-([a-z@=-]+)$/', $collationName, $match ) ) {
# Provide a mechanism for extensions to hook in.
$collationObject = null;
- wfRunHooks( 'Collation::factory', array( $collationName, &$collationObject ) );
+ Hooks::run( 'Collation::factory', array( $collationName, &$collationObject ) );
if ( $collationObject instanceof Collation ) {
return $collationObject;
/** @var Collator */
private $mainCollator;
- /** @var */
+ /** @var string */
private $locale;
/** @var Language */
'el' => array(),
'eo' => array( "Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ" ),
'es' => array( "Ñ" ),
- 'et' => array( "Š", "Ž", "Õ", "Ä", "Ö", "Ü" ),
+ 'et' => array( "Š", "Ž", "Õ", "Ä", "Ö", "Ü", "W" ), // added W for CollationEt (xx-uca-et)
'eu' => array( "Ñ" ),
'fo' => array( "Á", "Ð", "Í", "Ó", "Ú", "Ý", "Æ", "Ø", "Å" ),
'fur' => array( "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ),
// intl extension produces non null-terminated
// strings. Appending '' fixes it so that it doesn't generate
// a warning on each access in debug php.
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
$key = $this->mainCollator->getSortKey( $string ) . '';
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
return $key;
}
function getPrimarySortKey( $string ) {
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
$key = $this->primaryCollator->getSortKey( $string ) . '';
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
return $key;
}
// Check for CJK
$firstChar = mb_substr( $string, 0, 1, 'UTF-8' );
- if ( ord( $firstChar ) > 0x7f && self::isCjk( utf8ToCodepoint( $firstChar ) ) ) {
+ if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
return $firstChar;
}
}
$cache = wfGetCache( CACHE_ANYTHING );
- $cacheKey = wfMemcKey( 'first-letters', $this->locale, $this->digitTransformLanguage->getCode() );
+ $cacheKey = wfMemcKey(
+ 'first-letters',
+ $this->locale,
+ $this->digitTransformLanguage->getCode(),
+ self::getICUVersion()
+ );
$cacheEntry = $cache->get( $cacheKey );
if ( $cacheEntry && isset( $cacheEntry['version'] )
$this->digitTransformLanguage = Language::factory( 'ckb' );
}
}
+
+/**
+ * Workaround for incorrect collation of Estonian language ('et') in ICU (bug 54168).
+ *
+ * 'W' and 'V' should not be considered the same letter for the purposes of collation in modern
+ * Estonian. We work around this by replacing 'W' and 'w' with 'ᴡ' U+1D21 'LATIN LETTER SMALL
+ * CAPITAL W' for sortkey generation, which is collated like 'W' and is not tailored to have the
+ * same primary weight as 'V' in Estonian.
+ */
+class CollationEt extends IcuCollation {
+ function __construct() {
+ parent::__construct( 'et' );
+ }
+
+ private static function mangle( $string ) {
+ return str_replace(
+ array( 'w', 'W' ),
+ 'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W'
+ $string
+ );
+ }
+
+ private static function unmangle( $string ) {
+ // Casing data is lost…
+ return str_replace(
+ 'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W'
+ 'W',
+ $string
+ );
+ }
+
+ function getSortKey( $string ) {
+ return parent::getSortKey( self::mangle( $string ) );
+ }
+
+ function getFirstLetter( $string ) {
+ return self::unmangle( parent::getFirstLetter( self::mangle( $string ) ) );
+ }
+}