From afc6e7cd157f33ec592ae7292d20569707af4397 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bartosz=20Dziewo=C5=84ski?= Date: Wed, 28 Dec 2016 16:10:24 +0100 Subject: [PATCH] CollationFa: Third time's the charm MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We have to use a tertiary sortkey for everything with the primary sortkey of 2627. Otherwise, the "Remove duplicate prefixes" logic in IcuCollation would remove them. The following characters will now be considered separate letters in the 'xx-uca-fa' collation for the purpose of displaying the headings on category pages: ء ئ ا و ٲ ٳ Bug: T139110 Change-Id: Ibbea5d76348e4cdc38b74cba44286910b2ed592f --- includes/collation/CollationFa.php | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/includes/collation/CollationFa.php b/includes/collation/CollationFa.php index b7e45cc1d6..9cce087d3a 100644 --- a/includes/collation/CollationFa.php +++ b/includes/collation/CollationFa.php @@ -19,9 +19,12 @@ */ /** - * Temporary workaround for incorrect collation of Persian language ('fa') in ICU (bug T139110). + * Temporary workaround for incorrect collation of Persian language ('fa') in ICU 52 (bug T139110). * - * 'ا' and 'و' should not be considered the same letter for the purposes of collation in Persian. + * All of the following will be considered separate letters for category headings in Persian: + * - Characters 'و' 'ا' (often appear at the beginning of words) + * - Characters 'ٲ' 'ٳ' (may appear at the beginning of words in loanwords) + * - Characters 'ء' 'ئ' (don't appear at the beginning of words, but it's easier to implement) * * @since 1.29 */ @@ -34,11 +37,14 @@ class CollationFa extends IcuCollation { } public function getPrimarySortKey( $string ) { - $firstLetter = mb_substr( $string, 0, 1 ); - if ( $firstLetter === 'و' || $firstLetter === 'ا' ) { + $primary = parent::getPrimarySortKey( $string ); + // We have to use a tertiary sortkey for everything with the primary sortkey of 2627. + // Otherwise, the "Remove duplicate prefixes" logic in IcuCollation would remove them. + // This matches sortkeys for the following characters: ء ئ ا و ٲ ٳ + if ( substr( $primary, 0, 2 ) === "\x26\x27" ) { + wfDebug( "Using tertiary sortkey for '$string'\n" ); return $this->tertiaryCollator->getSortKey( $string ); } - - return parent::getPrimarySortKey( $string ); + return $primary; } } -- 2.20.1