*/
/**
- * Temporary workaround for incorrect collation of Persian language ('fa') in ICU (bug T139110).
+ * Temporary workaround for incorrect collation of Persian language ('fa') in ICU 52 (bug T139110).
*
- * 'ا' and 'و' should not be considered the same letter for the purposes of collation in Persian.
+ * Replace with other letters that appear in an okish spot in the alphabet
+ *
+ * - Characters 'و' 'ا' (often appear at the beginning of words)
+ * - Characters 'ٲ' 'ٳ' (may appear at the beginning of words in loanwords)
*
* @since 1.29
*/
class CollationFa extends IcuCollation {
- private $tertiaryCollator;
+
+ // Really hacky - replace with stuff from other blocks.
+ private $override = [
+ "\xd8\xa7" => "\u{0621}",
+ "\xd9\x88" => "\u{0649}",
+ "\xd9\xb2" => "\xF3\xB3\x80\x81",
+ "\xd9\xb3" => "\xF3\xB3\x80\x82",
+ ];
public function __construct() {
parent::__construct( 'fa' );
- $this->tertiaryCollator = Collator::create( 'fa' );
}
- public function getPrimarySortKey( $string ) {
- $firstLetter = mb_substr( $string, 0, 1 );
- if ( $firstLetter === 'و' || $firstLetter === 'ا' ) {
- return $this->tertiaryCollator->getSortKey( $string );
- }
+ public function getSortKey( $string ) {
+ $modified = strtr( $string, $this->override );
+ return parent::getSortKey( $modified );
+ }
- return parent::getPrimarySortKey( $string );
+ public function getFirstLetter( $string ) {
+ if ( isset( $this->override[substr( $string, 0, 2 )] ) ) {
+ return substr( $string, 0, 2 );
+ }
+ return parent::getFirstLetter( $string );
}
}