IcuCollation: Use codepoint as tiebreaker when getting first-letters
authorBartosz Dziewoński <matma.rex@gmail.com>
Tue, 8 May 2018 11:43:10 +0000 (13:43 +0200)
committerMartineznovo <martineznovo@gmail.com>
Fri, 11 May 2018 14:58:43 +0000 (14:58 +0000)
This prevents unexpected cuneiform digits from acting as headings for
2 and 3 on category pages.

Bug: T187645
Change-Id: I0424a24769899cb23b28704f97e1002fa44999fd
(cherry picked from commit 390ff7fca179e26ac177810145d27d98fe2fff43)

includes/collation/IcuCollation.php

index 36efdb3..9ac81ae 100644 (file)
@@ -384,9 +384,17 @@ class IcuCollation extends Collation {
                foreach ( $letters as $letter ) {
                        $key = $this->getPrimarySortKey( $letter );
                        if ( isset( $letterMap[$key] ) ) {
-                               // Primary collision
-                               // Keep whichever one sorts first in the main collator
-                               if ( $this->mainCollator->compare( $letter, $letterMap[$key] ) < 0 ) {
+                               // Primary collision (two characters with the same sort position).
+                               // Keep whichever one sorts first in the main collator.
+                               $comp = $this->mainCollator->compare( $letter, $letterMap[$key] );
+                               wfDebug( "Primary collision '$letter' '{$letterMap[$key]}' (comparison: $comp)\n" );
+                               // If that also has a collision, use codepoint as a tiebreaker.
+                               if ( $comp === 0 ) {
+                                       // TODO Use <=> operator when PHP 7 is allowed.
+                                       $comp = UtfNormal\Utils::utf8ToCodepoint( $letter ) -
+                                               UtfNormal\Utils::utf8ToCodepoint( $letterMap[$key] );
+                               }
+                               if ( $comp < 0 ) {
                                        $letterMap[$key] = $letter;
                                }
                        } else {