From: jenkins-bot Date: Wed, 27 Jul 2016 14:54:48 +0000 (+0000) Subject: Merge "Adding support for numeric collation when using UCA collations" X-Git-Tag: 1.31.0-rc.0~6244 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=420c52003bc3c319b85d3622386d1ec1bd69133d;hp=d1ac58cd85c46b5c9265ba479a45fd348c554064 Merge "Adding support for numeric collation when using UCA collations" --- diff --git a/includes/collation/Collation.php b/includes/collation/Collation.php index 9fb06604f8..fe254afdc5 100644 --- a/includes/collation/Collation.php +++ b/includes/collation/Collation.php @@ -53,6 +53,8 @@ abstract class Collation { return new IdentityCollation; case 'uca-default': return new IcuCollation( 'root' ); + case 'uca-default-u-kn': + return new IcuCollation( 'root-u-kn' ); case 'xx-uca-ckb': return new CollationCkb; case 'xx-uca-et': diff --git a/includes/collation/IcuCollation.php b/includes/collation/IcuCollation.php index c2e8b2412d..f5d3c3357d 100644 --- a/includes/collation/IcuCollation.php +++ b/includes/collation/IcuCollation.php @@ -36,6 +36,9 @@ class IcuCollation extends Collation { /** @var Language */ protected $digitTransformLanguage; + /** @var boolean */ + private $useNumericCollation = false; + /** @var array */ private $firstLetterData; @@ -197,6 +200,15 @@ class IcuCollation extends Collation { $this->primaryCollator = Collator::create( $locale ); $this->primaryCollator->setStrength( Collator::PRIMARY ); + + // If the special suffix for numeric collation is present, turn on numeric collation. + if ( substr( $locale, -5, 5 ) === '-u-kn' ) { + $this->useNumericCollation = true; + // Strip off the special suffix so it doesn't trip up fetchFirstLetterData(). + $this->locale = substr( $this->locale, 0, -5 ); + $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON ); + $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON ); + } } public function getSortKey( $string ) { @@ -213,8 +225,9 @@ class IcuCollation extends Collation { return ''; } - // Check for CJK $firstChar = mb_substr( $string, 0, 1, 'UTF-8' ); + + // If the first character is a CJK character, just return that character. if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) { return $firstChar; } @@ -232,7 +245,19 @@ class IcuCollation extends Collation { // Before the first letter return ''; } - return $this->getLetterByIndex( $min ); + + $sortLetter = $this->getLetterByIndex( $min ); + + if ( $this->useNumericCollation ) { + // If the sort letter is a number, return '0–9' (or localized equivalent). + // ASCII value of 0 is 48. ASCII value of 9 is 57. + // Note that this also applies to non-Arabic numerals since they are + // mapped to Arabic numeral sort letters. For example, ২ sorts as 2. + if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) { + $sortLetter = wfMessage( 'category-header-numerals' )->numParams( 0, 9 )->text(); + } + } + return $sortLetter; } /** @@ -408,6 +433,7 @@ class IcuCollation extends Collation { } /** + * Test if a code point is a CJK (Chinese, Japanese, Korean) character * @since 1.16.3 */ public static function isCjk( $codepoint ) { diff --git a/languages/i18n/en.json b/languages/i18n/en.json index 6d1e58d4c5..7c755f7db9 100644 --- a/languages/i18n/en.json +++ b/languages/i18n/en.json @@ -137,6 +137,7 @@ "noindex-category": "Noindexed pages", "broken-file-category": "Pages with broken file links", "categoryviewer-pagedlinks": "($1) ($2)", + "category-header-numerals": "$1–$2", "about": "About", "article": "Content page", "newwindow": "(opens in new window)", diff --git a/languages/i18n/qqq.json b/languages/i18n/qqq.json index 3cd7ac6da0..2182f79de7 100644 --- a/languages/i18n/qqq.json +++ b/languages/i18n/qqq.json @@ -318,6 +318,7 @@ "noindex-category": "Name of the [[mw:Special:MyLanguage/Help:Tracking categories|tracking category]] where pages with the __NOINDEX__ behavior switch are listed.\n\nFor description of this behavior switch see [[mw:Special:MyLanguage/Help:Magic_words#Behavior_switches|MediaWiki]].\n\nSee also:\n* {{msg-mw|Noindex-category-desc}}", "broken-file-category": "Name of [[mw:Special:MyLanguage/Help:Tracking categories|tracking category]] where pages that embed files that do not exist (\"broken images\") are listed.\n\nSee also:\n* {{msg-mw|Broken-file-category-desc}}", "categoryviewer-pagedlinks": "{{Optional}}\nThe pagination links in category viewer. Parameters:\n* $1 - the previous link, uses {{msg-mw|Prevn}}\n* $2 - the next link, uses {{msg-mw|Nextn}}", + "category-header-numerals": "{{Optional}}\nA header for all pages whose titles start with a number. This is used on category pages. This should only be translated if your language uses a different method to indicate a range of numbers (other than a dash).\n* $1 - 0 (or localized equivalent)\n* $2 – 9 (or localized equivalent)", "about": "{{Identical|About}}", "article": "A 'content page' is a page that forms part of the purpose of the wiki. It includes the main page and pages in the main namespace and any other namespaces that are included when the wiki is customised. For example on Wikimedia Commons 'content pages' include pages in the file and category namespaces. On Wikinews 'content pages' include pages in the Portal namespace. For technical definition of 'content namespaces' see [[mw:Manual:Using_custom_namespaces#Content_namespaces|MediaWiki]].\n\nPossible alternatives to the word 'content' are 'subject matter' or 'wiki subject' or 'wiki purpose'.\n\n{{Identical|Content page}}", "newwindow": "Below the edit form, next to \"{{msg-mw|Edithelp}}\".",