/** @var Language */
protected $digitTransformLanguage;
+ /** @var boolean */
+ private $useNumericCollation = false;
+
/** @var array */
private $firstLetterData;
* available and that there are, in fact, no additional letters to consider.
*/
private static $tailoringFirstLetters = [
- // Verified by native speakers
- 'be' => [ "Ё" ],
- 'be-tarask' => [ "Ё" ],
- 'cy' => [ "Ch", "Dd", "Ff", "Ng", "Ll", "Ph", "Rh", "Th" ],
- 'en' => [],
- // RTL, let's put each letter on a new line
- 'fa' => [
- "آ",
- "ء",
- "ه",
- "ا",
- "و"
- ],
- 'fi' => [ "Å", "Ä", "Ö" ],
- 'fr' => [],
- 'hu' => [ "Cs", "Dz", "Dzs", "Gy", "Ly", "Ny", "Ö", "Sz", "Ty", "Ü", "Zs" ],
- 'is' => [ "Á", "Ð", "É", "Í", "Ó", "Ú", "Ý", "Þ", "Æ", "Ö", "Å" ],
- 'it' => [],
- 'lv' => [ "Č", "Ģ", "Ķ", "Ļ", "Ņ", "Š", "Ž" ],
- 'pl' => [ "Ą", "Ć", "Ę", "Ł", "Ń", "Ó", "Ś", "Ź", "Ż" ],
- 'pt' => [],
- 'ru' => [],
- 'sv' => [ "Å", "Ä", "Ö" ],
- 'sv@collation=standard' => [ "Å", "Ä", "Ö" ],
- 'uk' => [ "Ґ", "Ь" ],
- 'vi' => [ "Ă", "Â", "Đ", "Ê", "Ô", "Ơ", "Ư" ],
- // Not verified, but likely correct
'af' => [],
- 'ast' => [ "Ch", "Ll", "Ñ" ],
+ 'am' => [],
+ 'ar' => [],
+ 'as' => [ "\xe0\xa6\x82", "\xe0\xa6\x81", "\xe0\xa6\x83", "\xe0\xa7\x8e", "ক্ষ " ],
+ 'ast' => [ "Ch", "Ll", "Ñ" ], // not in libicu
'az' => [ "Ç", "Ə", "Ğ", "İ", "Ö", "Ş", "Ü" ],
+ 'be' => [ "Ё" ],
+ 'be-tarask' => [ "Ё" ],
'bg' => [],
+ 'bo' => [],
'br' => [ "Ch", "C'h" ],
'bs' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ],
+ 'bs-Cyrl' => [],
'ca' => [],
- 'co' => [],
+ 'chr' => [],
+ 'co' => [], // not in libicu
'cs' => [ "Č", "Ch", "Ř", "Š", "Ž" ],
+ 'cy' => [ "Ch", "Dd", "Ff", "Ng", "Ll", "Ph", "Rh", "Th" ],
'da' => [ "Æ", "Ø", "Å" ],
'de' => [],
+ 'de-AT@collation=phonebook' => [ 'ä', 'ö', 'ü', 'ß' ],
'dsb' => [ "Č", "Ć", "Dź", "Ě", "Ch", "Ł", "Ń", "Ŕ", "Š", "Ś", "Ž", "Ź" ],
+ 'ee' => [ "Dz", "Ɖ", "Ɛ", "Ƒ", "Gb", "Ɣ", "Kp", "Ny", "Ŋ", "Ɔ", "Ts", "Ʋ" ],
'el' => [],
+ 'en' => [],
'eo' => [ "Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ" ],
'es' => [ "Ñ" ],
'et' => [ "Š", "Ž", "Õ", "Ä", "Ö", "Ü", "W" ], // added W for CollationEt (xx-uca-et)
- 'eu' => [ "Ñ" ],
+ 'eu' => [ "Ñ" ], // not in libicu
+ 'fa' => [
+ // RTL, let's put each letter on a new line
+ "آ",
+ "ء",
+ "ه",
+ "ا",
+ "و"
+ ],
+ 'fi' => [ "Å", "Ä", "Ö" ],
+ 'fil' => [ "Ñ", "Ng" ],
'fo' => [ "Á", "Ð", "Í", "Ó", "Ú", "Ý", "Æ", "Ø", "Å" ],
- 'fur' => [ "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ],
- 'fy' => [],
+ 'fr' => [],
+ 'fr-CA' => [], // fr-CA sorts accents slightly different from fr.
+ 'fur' => [ "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ], // not in libicu
+ 'fy' => [], // not in libicu
'ga' => [],
- 'gd' => [],
+ 'gd' => [], // not in libicu
'gl' => [ "Ch", "Ll", "Ñ" ],
+ 'gu' => [ "\xe0\xaa\x82", "\xe0\xaa\x83", "\xe0\xaa\x81", "\xe0\xaa\xb3" ],
+ 'ha' => [ 'Ɓ', 'Ɗ', 'Ƙ', 'Sh', 'Ts', 'Ƴ' ],
+ 'haw' => [ 'ʻ' ],
+ 'he' => [],
+ 'hi' => [ "\xe0\xa4\x82", "\xe0\xa4\x83" ],
'hr' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ],
'hsb' => [ "Č", "Dź", "Ě", "Ch", "Ł", "Ń", "Ř", "Š", "Ć", "Ž" ],
+ 'hu' => [ "Cs", "Dz", "Dzs", "Gy", "Ly", "Ny", "Ö", "Sz", "Ty", "Ü", "Zs" ],
+ 'hy' => [ "և" ],
+ 'id' => [],
+ 'ig' => [ "Ch", "Gb", "Gh", "Gw", "Ị", "Kp", "Kw", "Ṅ", "Nw", "Ny", "Ọ", "Sh", "Ụ" ],
+ 'is' => [ "Á", "Ð", "É", "Í", "Ó", "Ú", "Ý", "Þ", "Æ", "Ö", "Å" ],
+ 'it' => [],
+ 'ka' => [],
'kk' => [ "Ү", "І" ],
'kl' => [ "Æ", "Ø", "Å" ],
- 'ku' => [ "Ç", "Ê", "Î", "Ş", "Û" ],
+ 'km' => [
+ "រ", "ឫ", "ឬ", "ល", "ឭ", "ឮ", "\xe1\x9e\xbb\xe1\x9f\x86",
+ "\xe1\x9f\x86", "\xe1\x9e\xb6\xe1\x9f\x86", "\xe1\x9f\x87",
+ "\xe1\x9e\xb7\xe1\x9f\x87", "\xe1\x9e\xbb\xe1\x9f\x87",
+ "\xe1\x9f\x81\xe1\x9f\x87", "\xe1\x9f\x84\xe1\x9f\x87",
+ ],
+ 'kn' => [ "\xe0\xb2\x81", "\xe0\xb2\x83", "\xe0\xb3\xb1", "\xe0\xb3\xb2" ],
+ 'kok' => [ "\xe0\xa4\x82", "\xe0\xa4\x83", "ळ", "क्ष" ],
+ 'ku' => [ "Ç", "Ê", "Î", "Ş", "Û" ], // not in libicu
'ky' => [ "Ё" ],
- 'la' => [],
+ 'la' => [], // not in libicu
'lb' => [],
+ 'lkt' => [ 'Č', 'Ǧ', 'Ȟ', 'Š', 'Ž' ],
+ 'ln' => [ 'Ɛ' ],
+ 'lo' => [],
'lt' => [ "Č", "Š", "Ž" ],
- 'mk' => [],
- 'mo' => [ "Ă", "Â", "Î", "Ş", "Ţ" ],
+ 'lv' => [ "Č", "Ģ", "Ķ", "Ļ", "Ņ", "Š", "Ž" ],
+ 'mk' => [ "Ѓ", "Ќ" ],
+ 'ml' => [],
+ 'mn' => [],
+ 'mo' => [ "Ă", "Â", "Î", "Ş", "Ţ" ], // not in libicu
+ 'mr' => [ "\xe0\xa4\x82", "\xe0\xa4\x83", "ळ", "क्ष", "ज्ञ" ],
+ 'ms' => [],
'mt' => [ "Ċ", "Ġ", "Għ", "Ħ", "Ż" ],
+ 'nb' => [ "Æ", "Ø", "Å" ],
+ 'ne' => [],
'nl' => [],
- 'no' => [ "Æ", "Ø", "Å" ],
- 'oc' => [],
- 'rm' => [],
+ 'nn' => [ "Æ", "Ø", "Å" ],
+ 'no' => [ "Æ", "Ø", "Å" ], // not in libicu. You should probably use nb or nn instead.
+ 'oc' => [], // not in libicu
+ 'om' => [ 'Ch', 'Dh', 'Kh', 'Ny', 'Ph', 'Sh' ],
+ 'or' => [ "\xe0\xac\x81", "\xe0\xac\x82", "\xe0\xac\x83", "କ୍ଷ" ],
+ 'pa' => [ "\xe0\xa9\x8d" ],
+ 'pl' => [ "Ą", "Ć", "Ę", "Ł", "Ń", "Ó", "Ś", "Ź", "Ż" ],
+ 'pt' => [],
+ 'rm' => [], // not in libicu
'ro' => [ "Ă", "Â", "Î", "Ş", "Ţ" ],
- 'rup' => [ "Ă", "Â", "Î", "Ľ", "Ń", "Ş", "Ţ" ],
+ 'ru' => [],
+ 'rup' => [ "Ă", "Â", "Î", "Ľ", "Ń", "Ş", "Ţ" ], // not in libicu
'sco' => [],
+ 'se' => [
+ 'Á', 'Č', 'Ʒ', 'Ǯ', 'Đ', 'Ǧ', 'Ǥ', 'Ǩ', 'Ŋ',
+ 'Š', 'Ŧ', 'Ž', 'Ø', 'Æ', 'Ȧ', 'Ä', 'Ö'
+ ],
+ 'si' => [ "\xe0\xb6\x82", "\xe0\xb6\x83", "\xe0\xb6\xa4" ],
'sk' => [ "Ä", "Č", "Ch", "Ô", "Š", "Ž" ],
'sl' => [ "Č", "Š", "Ž" ],
'smn' => [ "Á", "Č", "Đ", "Ŋ", "Š", "Ŧ", "Ž", "Æ", "Ø", "Å", "Ä", "Ö" ],
'sq' => [ "Ç", "Dh", "Ë", "Gj", "Ll", "Nj", "Rr", "Sh", "Th", "Xh", "Zh" ],
'sr' => [],
+ 'sr-Latn' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ],
+ 'sv' => [ "Å", "Ä", "Ö" ],
+ 'sv@collation=standard' => [ "Å", "Ä", "Ö" ],
+ 'sw' => [],
'ta' => [
"\xE0\xAE\x82", "ஃ", "க்ஷ", "க்", "ங்", "ச்", "ஞ்", "ட்", "ண்", "த்", "ந்",
"ப்", "ம்", "ய்", "ர்", "ல்", "வ்", "ழ்", "ள்", "ற்", "ன்", "ஜ்", "ஶ்", "ஷ்",
"ஸ்", "ஹ்", "க்ஷ்"
],
+ 'te' => [ "\xe0\xb0\x81", "\xe0\xb0\x82", "\xe0\xb0\x83" ],
+ 'th' => [ "ฯ", "\xe0\xb9\x86", "\xe0\xb9\x8d", "\xe0\xb8\xba" ],
'tk' => [ "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ],
- 'tl' => [ "Ñ", "Ng" ],
+ 'tl' => [ "Ñ", "Ng" ], // not in libicu
+ 'to' => [ "Ng", "ʻ" ],
'tr' => [ "Ç", "Ğ", "İ", "Ö", "Ş", "Ü" ],
- 'tt' => [ "Ә", "Ө", "Ү", "Җ", "Ң", "Һ" ],
- 'uz' => [ "Ch", "G'", "Ng", "O'", "Sh" ],
+ 'tt' => [ "Ә", "Ө", "Ү", "Җ", "Ң", "Һ" ], // not in libicu
+ 'uk' => [ "Ґ", "Ь" ],
+ 'uz' => [ "Ch", "G'", "Ng", "O'", "Sh" ], // not in libicu
+ 'vi' => [ "Ă", "Â", "Đ", "Ê", "Ô", "Ơ", "Ư" ],
+ 'vo' => [ "Ä", "Ö", "Ü" ],
+ 'yi' => [
+ "\xd7\x91\xd6\xbf", "\xd7\x9b\xd6\xbc", "\xd7\xa4\xd6\xbc",
+ "\xd7\xa9\xd7\x82", "\xd7\xaa\xd6\xbc"
+ ],
+ 'yo' => [ "Ẹ", "Gb", "Ọ", "Ṣ" ],
+ 'zu' => [],
];
/**
$this->primaryCollator = Collator::create( $locale );
$this->primaryCollator->setStrength( Collator::PRIMARY );
+
+ // If the special suffix for numeric collation is present, turn on numeric collation.
+ if ( substr( $locale, -5, 5 ) === '-u-kn' ) {
+ $this->useNumericCollation = true;
+ // Strip off the special suffix so it doesn't trip up fetchFirstLetterData().
+ $this->locale = substr( $this->locale, 0, -5 );
+ $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
+ $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
+ }
}
public function getSortKey( $string ) {
return '';
}
- // Check for CJK
$firstChar = mb_substr( $string, 0, 1, 'UTF-8' );
+
+ // If the first character is a CJK character, just return that character.
if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
return $firstChar;
}
// Before the first letter
return '';
}
- return $this->getLetterByIndex( $min );
+
+ $sortLetter = $this->getLetterByIndex( $min );
+
+ if ( $this->useNumericCollation ) {
+ // If the sort letter is a number, return '0–9' (or localized equivalent).
+ // ASCII value of 0 is 48. ASCII value of 9 is 57.
+ // Note that this also applies to non-Arabic numerals since they are
+ // mapped to Arabic numeral sort letters. For example, ২ sorts as 2.
+ if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) {
+ $sortLetter = wfMessage( 'category-header-numerals' )->numParams( 0, 9 )->text();
+ }
+ }
+ return $sortLetter;
}
/**
}
/**
+ * Test if a code point is a CJK (Chinese, Japanese, Korean) character
* @since 1.16.3
*/
public static function isCjk( $codepoint ) {
$versionPrefix = substr( $icuVersion, 0, 3 );
// Source: http://site.icu-project.org/download
$map = [
+ '57.' => '8.0',
+ '56.' => '8.0',
+ '55.' => '7.0',
+ '54.' => '7.0',
+ '53.' => '6.3',
+ '52.' => '6.3',
+ '51.' => '6.2',
'50.' => '6.2',
'49.' => '6.1',
'4.8' => '6.0',