Merge "Remove use of deprecated User::EDIT_TOKEN_SUFFIX"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Sun, 3 Apr 2016 10:45:16 +0000 (10:45 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Sun, 3 Apr 2016 10:45:16 +0000 (10:45 +0000)
20 files changed:
autoload.php
includes/Collation.php [deleted file]
includes/SiteStats.php
includes/StubObject.php
includes/cache/localisation/LCStoreCDB.php
includes/collation/Collation.php [new file with mode: 0644]
includes/collation/CollationCkb.php [new file with mode: 0644]
includes/collation/CollationEt.php [new file with mode: 0644]
includes/collation/IcuCollation.php [new file with mode: 0644]
includes/collation/IdentityCollation.php [new file with mode: 0644]
includes/collation/UppercaseCollation.php [new file with mode: 0644]
includes/libs/eventrelayer/EventRelayer.php
includes/libs/eventrelayer/EventRelayerNull.php [new file with mode: 0644]
includes/media/Bitmap.php
includes/media/WebP.php
includes/specialpage/SpecialPage.php
includes/specials/SpecialBotPasswords.php
includes/specials/SpecialStatistics.php
maintenance/mctest.php
tests/phpunit/includes/debug/logger/monolog/AvroFormatterTest.php

index fd4f873..df87d98 100644 (file)
@@ -240,9 +240,9 @@ $wgAutoloadLocalClasses = [
        'CliInstaller' => __DIR__ . '/includes/installer/CliInstaller.php',
        'CloneDatabase' => __DIR__ . '/includes/db/CloneDatabase.php',
        'CodeContentHandler' => __DIR__ . '/includes/content/CodeContentHandler.php',
-       'Collation' => __DIR__ . '/includes/Collation.php',
-       'CollationCkb' => __DIR__ . '/includes/Collation.php',
-       'CollationEt' => __DIR__ . '/includes/Collation.php',
+       'Collation' => __DIR__ . '/includes/collation/Collation.php',
+       'CollationCkb' => __DIR__ . '/includes/collation/CollationCkb.php',
+       'CollationEt' => __DIR__ . '/includes/collation/CollationEt.php',
        'CommandLineInc' => __DIR__ . '/maintenance/commandLine.inc',
        'CommandLineInstaller' => __DIR__ . '/maintenance/install.php',
        'CompareParserCache' => __DIR__ . '/maintenance/compareParserCache.php',
@@ -398,7 +398,7 @@ $wgAutoloadLocalClasses = [
        'EventRelayer' => __DIR__ . '/includes/libs/eventrelayer/EventRelayer.php',
        'EventRelayerGroup' => __DIR__ . '/includes/EventRelayerGroup.php',
        'EventRelayerMCRD' => __DIR__ . '/includes/libs/eventrelayer/EventRelayerMCRD.php',
-       'EventRelayerNull' => __DIR__ . '/includes/libs/eventrelayer/EventRelayer.php',
+       'EventRelayerNull' => __DIR__ . '/includes/libs/eventrelayer/EventRelayerNull.php',
        'Exif' => __DIR__ . '/includes/media/Exif.php',
        'ExifBitmapHandler' => __DIR__ . '/includes/media/ExifBitmap.php',
        'ExplodeIterator' => __DIR__ . '/includes/libs/ExplodeIterator.php',
@@ -564,8 +564,8 @@ $wgAutoloadLocalClasses = [
        'IPSet' => __DIR__ . '/includes/compat/IPSetCompat.php',
        'IPTC' => __DIR__ . '/includes/media/IPTC.php',
        'IRCColourfulRCFeedFormatter' => __DIR__ . '/includes/rcfeed/IRCColourfulRCFeedFormatter.php',
-       'IcuCollation' => __DIR__ . '/includes/Collation.php',
-       'IdentityCollation' => __DIR__ . '/includes/Collation.php',
+       'IcuCollation' => __DIR__ . '/includes/collation/IcuCollation.php',
+       'IdentityCollation' => __DIR__ . '/includes/collation/IdentityCollation.php',
        'ImageBuilder' => __DIR__ . '/maintenance/rebuildImages.php',
        'ImageCleanup' => __DIR__ . '/maintenance/cleanupImages.php',
        'ImageGallery' => __DIR__ . '/includes/gallery/TraditionalImageGallery.php',
@@ -1375,7 +1375,7 @@ $wgAutoloadLocalClasses = [
        'UploadStashNotLoggedInException' => __DIR__ . '/includes/upload/UploadStash.php',
        'UploadStashWrongOwnerException' => __DIR__ . '/includes/upload/UploadStash.php',
        'UploadStashZeroLengthFileException' => __DIR__ . '/includes/upload/UploadStash.php',
-       'UppercaseCollation' => __DIR__ . '/includes/Collation.php',
+       'UppercaseCollation' => __DIR__ . '/includes/collation/UppercaseCollation.php',
        'UsageException' => __DIR__ . '/includes/api/ApiMain.php',
        'User' => __DIR__ . '/includes/user/User.php',
        'UserArray' => __DIR__ . '/includes/user/UserArray.php',
diff --git a/includes/Collation.php b/includes/Collation.php
deleted file mode 100644 (file)
index 7a3623d..0000000
+++ /dev/null
@@ -1,648 +0,0 @@
-<?php
-/**
- * Database row sorting.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-abstract class Collation {
-       private static $instance;
-
-       /**
-        * @return Collation
-        */
-       static function singleton() {
-               if ( !self::$instance ) {
-                       global $wgCategoryCollation;
-                       self::$instance = self::factory( $wgCategoryCollation );
-               }
-               return self::$instance;
-       }
-
-       /**
-        * @throws MWException
-        * @param string $collationName
-        * @return Collation
-        */
-       static function factory( $collationName ) {
-               switch ( $collationName ) {
-                       case 'uppercase':
-                               return new UppercaseCollation;
-                       case 'identity':
-                               return new IdentityCollation;
-                       case 'uca-default':
-                               return new IcuCollation( 'root' );
-                       case 'xx-uca-ckb':
-                               return new CollationCkb;
-                       case 'xx-uca-et':
-                               return new CollationEt;
-                       default:
-                               $match = [];
-                               if ( preg_match( '/^uca-([a-z@=-]+)$/', $collationName, $match ) ) {
-                                       return new IcuCollation( $match[1] );
-                               }
-
-                               # Provide a mechanism for extensions to hook in.
-                               $collationObject = null;
-                               Hooks::run( 'Collation::factory', [ $collationName, &$collationObject ] );
-
-                               if ( $collationObject instanceof Collation ) {
-                                       return $collationObject;
-                               }
-
-                               // If all else fails...
-                               throw new MWException( __METHOD__ . ": unknown collation type \"$collationName\"" );
-               }
-       }
-
-       /**
-        * Given a string, convert it to a (hopefully short) key that can be used
-        * for efficient sorting.  A binary sort according to the sortkeys
-        * corresponds to a logical sort of the corresponding strings.  Current
-        * code expects that a line feed character should sort before all others, but
-        * has no other particular expectations (and that one can be changed if
-        * necessary).
-        *
-        * @param string $string UTF-8 string
-        * @return string Binary sortkey
-        */
-       abstract function getSortKey( $string );
-
-       /**
-        * Given a string, return the logical "first letter" to be used for
-        * grouping on category pages and so on.  This has to be coordinated
-        * carefully with convertToSortkey(), or else the sorted list might jump
-        * back and forth between the same "initial letters" or other pathological
-        * behavior.  For instance, if you just return the first character, but "a"
-        * sorts the same as "A" based on getSortKey(), then you might get a
-        * list like
-        *
-        * == A ==
-        * * [[Aardvark]]
-        *
-        * == a ==
-        * * [[antelope]]
-        *
-        * == A ==
-        * * [[Ape]]
-        *
-        * etc., assuming for the sake of argument that $wgCapitalLinks is false.
-        *
-        * @param string $string UTF-8 string
-        * @return string UTF-8 string corresponding to the first letter of input
-        */
-       abstract function getFirstLetter( $string );
-}
-
-class UppercaseCollation extends Collation {
-       private $lang;
-
-       function __construct() {
-               // Get a language object so that we can use the generic UTF-8 uppercase
-               // function there
-               $this->lang = Language::factory( 'en' );
-       }
-
-       function getSortKey( $string ) {
-               return $this->lang->uc( $string );
-       }
-
-       function getFirstLetter( $string ) {
-               if ( $string[0] == "\0" ) {
-                       $string = substr( $string, 1 );
-               }
-               return $this->lang->ucfirst( $this->lang->firstChar( $string ) );
-       }
-}
-
-/**
- * Collation class that's essentially a no-op.
- *
- * Does sorting based on binary value of the string.
- * Like how things were pre 1.17.
- */
-class IdentityCollation extends Collation {
-
-       function getSortKey( $string ) {
-               return $string;
-       }
-
-       function getFirstLetter( $string ) {
-               global $wgContLang;
-               // Copied from UppercaseCollation.
-               // I'm kind of unclear on when this could happen...
-               if ( $string[0] == "\0" ) {
-                       $string = substr( $string, 1 );
-               }
-               return $wgContLang->firstChar( $string );
-       }
-}
-
-class IcuCollation extends Collation {
-       const FIRST_LETTER_VERSION = 2;
-
-       /** @var Collator */
-       private $primaryCollator;
-
-       /** @var Collator */
-       private $mainCollator;
-
-       /** @var string */
-       private $locale;
-
-       /** @var Language */
-       protected $digitTransformLanguage;
-
-       /** @var array */
-       private $firstLetterData;
-
-       /**
-        * Unified CJK blocks.
-        *
-        * The same definition of a CJK block must be used for both Collation and
-        * generateCollationData.php. These blocks are omitted from the first
-        * letter data, as an optimisation measure and because the default UCA table
-        * is pretty useless for sorting Chinese text anyway. Japanese and Korean
-        * blocks are not included here, because they are smaller and more useful.
-        */
-       private static $cjkBlocks = [
-               [ 0x2E80, 0x2EFF ], // CJK Radicals Supplement
-               [ 0x2F00, 0x2FDF ], // Kangxi Radicals
-               [ 0x2FF0, 0x2FFF ], // Ideographic Description Characters
-               [ 0x3000, 0x303F ], // CJK Symbols and Punctuation
-               [ 0x31C0, 0x31EF ], // CJK Strokes
-               [ 0x3200, 0x32FF ], // Enclosed CJK Letters and Months
-               [ 0x3300, 0x33FF ], // CJK Compatibility
-               [ 0x3400, 0x4DBF ], // CJK Unified Ideographs Extension A
-               [ 0x4E00, 0x9FFF ], // CJK Unified Ideographs
-               [ 0xF900, 0xFAFF ], // CJK Compatibility Ideographs
-               [ 0xFE30, 0xFE4F ], // CJK Compatibility Forms
-               [ 0x20000, 0x2A6DF ], // CJK Unified Ideographs Extension B
-               [ 0x2A700, 0x2B73F ], // CJK Unified Ideographs Extension C
-               [ 0x2B740, 0x2B81F ], // CJK Unified Ideographs Extension D
-               [ 0x2F800, 0x2FA1F ], // CJK Compatibility Ideographs Supplement
-       ];
-
-       /**
-        * Additional characters (or character groups) to be considered separate
-        * letters for given languages, or to be removed from the list of such
-        * letters (denoted by keys starting with '-').
-        *
-        * These are additions to (or subtractions from) the data stored in the
-        * first-letters-root.ser file (which among others includes full basic latin,
-        * cyrillic and greek alphabets).
-        *
-        * "Separate letter" is a letter that would have a separate heading/section
-        * for it in a dictionary or a phone book in this language. This data isn't
-        * used for sorting (the ICU library handles that), only for deciding which
-        * characters (or character groups) to use as headings.
-        *
-        * Initially generated based on the primary level of Unicode collation
-        * tailorings available at http://developer.mimer.com/charts/tailorings.htm ,
-        * later modified.
-        *
-        * Empty arrays are intended; this signifies that the data for the language is
-        * available and that there are, in fact, no additional letters to consider.
-        */
-       private static $tailoringFirstLetters = [
-               // Verified by native speakers
-               'be' => [ "Ё" ],
-               'be-tarask' => [ "Ё" ],
-               'cy' => [ "Ch", "Dd", "Ff", "Ng", "Ll", "Ph", "Rh", "Th" ],
-               'en' => [],
-               'fa' => [ "آ", "ء", "ه" ],
-               'fi' => [ "Å", "Ä", "Ö" ],
-               'fr' => [],
-               'hu' => [ "Cs", "Dz", "Dzs", "Gy", "Ly", "Ny", "Ö", "Sz", "Ty", "Ü", "Zs" ],
-               'is' => [ "Á", "Ð", "É", "Í", "Ó", "Ú", "Ý", "Þ", "Æ", "Ö", "Å" ],
-               'it' => [],
-               'lv' => [ "Č", "Ģ", "Ķ", "Ļ", "Ņ", "Š", "Ž" ],
-               'pl' => [ "Ą", "Ć", "Ę", "Ł", "Ń", "Ó", "Ś", "Ź", "Ż" ],
-               'pt' => [],
-               'ru' => [],
-               'sv' => [ "Å", "Ä", "Ö" ],
-               'sv@collation=standard' => [ "Å", "Ä", "Ö" ],
-               'uk' => [ "Ґ", "Ь" ],
-               'vi' => [ "Ă", "Â", "Đ", "Ê", "Ô", "Ơ", "Ư" ],
-               // Not verified, but likely correct
-               'af' => [],
-               'ast' => [ "Ch", "Ll", "Ñ" ],
-               'az' => [ "Ç", "Ə", "Ğ", "İ", "Ö", "Ş", "Ü" ],
-               'bg' => [],
-               'br' => [ "Ch", "C'h" ],
-               'bs' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ],
-               'ca' => [],
-               'co' => [],
-               'cs' => [ "Č", "Ch", "Ř", "Š", "Ž" ],
-               'da' => [ "Æ", "Ø", "Å" ],
-               'de' => [],
-               'dsb' => [ "Č", "Ć", "Dź", "Ě", "Ch", "Ł", "Ń", "Ŕ", "Š", "Ś", "Ž", "Ź" ],
-               'el' => [],
-               'eo' => [ "Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ" ],
-               'es' => [ "Ñ" ],
-               'et' => [ "Š", "Ž", "Õ", "Ä", "Ö", "Ü", "W" ], // added W for CollationEt (xx-uca-et)
-               'eu' => [ "Ñ" ],
-               'fo' => [ "Á", "Ð", "Í", "Ó", "Ú", "Ý", "Æ", "Ø", "Å" ],
-               'fur' => [ "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ],
-               'fy' => [],
-               'ga' => [],
-               'gd' => [],
-               'gl' => [ "Ch", "Ll", "Ñ" ],
-               'hr' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ],
-               'hsb' => [ "Č", "Dź", "Ě", "Ch", "Ł", "Ń", "Ř", "Š", "Ć", "Ž" ],
-               'kk' => [ "Ү", "І" ],
-               'kl' => [ "Æ", "Ø", "Å" ],
-               'ku' => [ "Ç", "Ê", "Î", "Ş", "Û" ],
-               'ky' => [ "Ё" ],
-               'la' => [],
-               'lb' => [],
-               'lt' => [ "Č", "Š", "Ž" ],
-               'mk' => [],
-               'mo' => [ "Ă", "Â", "Î", "Ş", "Ţ" ],
-               'mt' => [ "Ċ", "Ġ", "Għ", "Ħ", "Ż" ],
-               'nl' => [],
-               'no' => [ "Æ", "Ø", "Å" ],
-               'oc' => [],
-               'rm' => [],
-               'ro' => [ "Ă", "Â", "Î", "Ş", "Ţ" ],
-               'rup' => [ "Ă", "Â", "Î", "Ľ", "Ń", "Ş", "Ţ" ],
-               'sco' => [],
-               'sk' => [ "Ä", "Č", "Ch", "Ô", "Š", "Ž" ],
-               'sl' => [ "Č", "Š", "Ž" ],
-               'smn' => [ "Á", "Č", "Đ", "Ŋ", "Š", "Ŧ", "Ž", "Æ", "Ø", "Å", "Ä", "Ö" ],
-               'sq' => [ "Ç", "Dh", "Ë", "Gj", "Ll", "Nj", "Rr", "Sh", "Th", "Xh", "Zh" ],
-               'sr' => [],
-               'tk' => [ "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ],
-               'tl' => [ "Ñ", "Ng" ],
-               'tr' => [ "Ç", "Ğ", "İ", "Ö", "Ş", "Ü" ],
-               'tt' => [ "Ә", "Ө", "Ү", "Җ", "Ң", "Һ" ],
-               'uz' => [ "Ch", "G'", "Ng", "O'", "Sh" ],
-       ];
-
-       const RECORD_LENGTH = 14;
-
-       function __construct( $locale ) {
-               if ( !extension_loaded( 'intl' ) ) {
-                       throw new MWException( 'An ICU collation was requested, ' .
-                               'but the intl extension is not available.' );
-               }
-
-               $this->locale = $locale;
-               // Drop everything after the '@' in locale's name
-               $localeParts = explode( '@', $locale );
-               $this->digitTransformLanguage = Language::factory( $locale === 'root' ? 'en' : $localeParts[0] );
-
-               $this->mainCollator = Collator::create( $locale );
-               if ( !$this->mainCollator ) {
-                       throw new MWException( "Invalid ICU locale specified for collation: $locale" );
-               }
-
-               $this->primaryCollator = Collator::create( $locale );
-               $this->primaryCollator->setStrength( Collator::PRIMARY );
-       }
-
-       function getSortKey( $string ) {
-               // intl extension produces non null-terminated
-               // strings. Appending '' fixes it so that it doesn't generate
-               // a warning on each access in debug php.
-               MediaWiki\suppressWarnings();
-               $key = $this->mainCollator->getSortKey( $string ) . '';
-               MediaWiki\restoreWarnings();
-               return $key;
-       }
-
-       function getPrimarySortKey( $string ) {
-               MediaWiki\suppressWarnings();
-               $key = $this->primaryCollator->getSortKey( $string ) . '';
-               MediaWiki\restoreWarnings();
-               return $key;
-       }
-
-       function getFirstLetter( $string ) {
-               $string = strval( $string );
-               if ( $string === '' ) {
-                       return '';
-               }
-
-               // Check for CJK
-               $firstChar = mb_substr( $string, 0, 1, 'UTF-8' );
-               if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
-                       return $firstChar;
-               }
-
-               $sortKey = $this->getPrimarySortKey( $string );
-
-               // Do a binary search to find the correct letter to sort under
-               $min = ArrayUtils::findLowerBound(
-                       [ $this, 'getSortKeyByLetterIndex' ],
-                       $this->getFirstLetterCount(),
-                       'strcmp',
-                       $sortKey );
-
-               if ( $min === false ) {
-                       // Before the first letter
-                       return '';
-               }
-               return $this->getLetterByIndex( $min );
-       }
-
-       function getFirstLetterData() {
-               if ( $this->firstLetterData !== null ) {
-                       return $this->firstLetterData;
-               }
-
-               $cache = wfGetCache( CACHE_ANYTHING );
-               $cacheKey = wfMemcKey(
-                       'first-letters',
-                       $this->locale,
-                       $this->digitTransformLanguage->getCode(),
-                       self::getICUVersion()
-               );
-               $cacheEntry = $cache->get( $cacheKey );
-
-               if ( $cacheEntry && isset( $cacheEntry['version'] )
-                       && $cacheEntry['version'] == self::FIRST_LETTER_VERSION
-               ) {
-                       $this->firstLetterData = $cacheEntry;
-                       return $this->firstLetterData;
-               }
-
-               // Generate data from serialized data file
-
-               if ( isset( self::$tailoringFirstLetters[$this->locale] ) ) {
-                       $letters = wfGetPrecompiledData( "first-letters-root.ser" );
-                       // Append additional characters
-                       $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] );
-                       // Remove unnecessary ones, if any
-                       if ( isset( self::$tailoringFirstLetters['-' . $this->locale] ) ) {
-                               $letters = array_diff( $letters, self::$tailoringFirstLetters['-' . $this->locale] );
-                       }
-                       // Apply digit transforms
-                       $digits = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ];
-                       $letters = array_diff( $letters, $digits );
-                       foreach ( $digits as $digit ) {
-                               $letters[] = $this->digitTransformLanguage->formatNum( $digit, true );
-                       }
-               } else {
-                       $letters = wfGetPrecompiledData( "first-letters-{$this->locale}.ser" );
-                       if ( $letters === false ) {
-                               throw new MWException( "MediaWiki does not support ICU locale " .
-                                       "\"{$this->locale}\"" );
-                       }
-               }
-
-               /* Sort the letters.
-                *
-                * It's impossible to have the precompiled data file properly sorted,
-                * because the sort order changes depending on ICU version. If the
-                * array is not properly sorted, the binary search will return random
-                * results.
-                *
-                * We also take this opportunity to remove primary collisions.
-                */
-               $letterMap = [];
-               foreach ( $letters as $letter ) {
-                       $key = $this->getPrimarySortKey( $letter );
-                       if ( isset( $letterMap[$key] ) ) {
-                               // Primary collision
-                               // Keep whichever one sorts first in the main collator
-                               if ( $this->mainCollator->compare( $letter, $letterMap[$key] ) < 0 ) {
-                                       $letterMap[$key] = $letter;
-                               }
-                       } else {
-                               $letterMap[$key] = $letter;
-                       }
-               }
-               ksort( $letterMap, SORT_STRING );
-
-               /* Remove duplicate prefixes. Basically if something has a sortkey
-                * which is a prefix of some other sortkey, then it is an
-                * expansion and probably should not be considered a section
-                * header.
-                *
-                * For example 'þ' is sometimes sorted as if it is the letters
-                * 'th'. Other times it is its own primary element. Another
-                * example is '₨'. Sometimes its a currency symbol. Sometimes it
-                * is an 'R' followed by an 's'.
-                *
-                * Additionally an expanded element should always sort directly
-                * after its first element due to they way sortkeys work.
-                *
-                * UCA sortkey elements are of variable length but no collation
-                * element should be a prefix of some other element, so I think
-                * this is safe. See:
-                * - https://ssl.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
-                * - http://site.icu-project.org/design/collation/uca-weight-allocation
-                *
-                * Additionally, there is something called primary compression to
-                * worry about. Basically, if you have two primary elements that
-                * are more than one byte and both start with the same byte then
-                * the first byte is dropped on the second primary. Additionally
-                * either \x03 or \xFF may be added to mean that the next primary
-                * does not start with the first byte of the first primary.
-                *
-                * This shouldn't matter much, as the first primary is not
-                * changed, and that is what we are comparing against.
-                *
-                * tl;dr: This makes some assumptions about how icu implements
-                * collations. It seems incredibly unlikely these assumptions
-                * will change, but nonetheless they are assumptions.
-                */
-
-               $prev = false;
-               $duplicatePrefixes = [];
-               foreach ( $letterMap as $key => $value ) {
-                       // Remove terminator byte. Otherwise the prefix
-                       // comparison will get hung up on that.
-                       $trimmedKey = rtrim( $key, "\0" );
-                       if ( $prev === false || $prev === '' ) {
-                               $prev = $trimmedKey;
-                               // We don't yet have a collation element
-                               // to compare against, so continue.
-                               continue;
-                       }
-
-                       // Due to the fact the array is sorted, we only have
-                       // to compare with the element directly previous
-                       // to the current element (skipping expansions).
-                       // An element "X" will always sort directly
-                       // before "XZ" (Unless we have "XY", but we
-                       // do not update $prev in that case).
-                       if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) {
-                               $duplicatePrefixes[] = $key;
-                               // If this is an expansion, we don't want to
-                               // compare the next element to this element,
-                               // but to what is currently $prev
-                               continue;
-                       }
-                       $prev = $trimmedKey;
-               }
-               foreach ( $duplicatePrefixes as $badKey ) {
-                       wfDebug( "Removing '{$letterMap[$badKey]}' from first letters.\n" );
-                       unset( $letterMap[$badKey] );
-                       // This code assumes that unsetting does not change sort order.
-               }
-               $data = [
-                       'chars' => array_values( $letterMap ),
-                       'keys' => array_keys( $letterMap ),
-                       'version' => self::FIRST_LETTER_VERSION,
-               ];
-
-               // Reduce memory usage before caching
-               unset( $letterMap );
-
-               // Save to cache
-               $this->firstLetterData = $data;
-               $cache->set( $cacheKey, $data, $cache::TTL_WEEK );
-               return $data;
-       }
-
-       function getLetterByIndex( $index ) {
-               if ( $this->firstLetterData === null ) {
-                       $this->getFirstLetterData();
-               }
-               return $this->firstLetterData['chars'][$index];
-       }
-
-       function getSortKeyByLetterIndex( $index ) {
-               if ( $this->firstLetterData === null ) {
-                       $this->getFirstLetterData();
-               }
-               return $this->firstLetterData['keys'][$index];
-       }
-
-       function getFirstLetterCount() {
-               if ( $this->firstLetterData === null ) {
-                       $this->getFirstLetterData();
-               }
-               return count( $this->firstLetterData['chars'] );
-       }
-
-       static function isCjk( $codepoint ) {
-               foreach ( self::$cjkBlocks as $block ) {
-                       if ( $codepoint >= $block[0] && $codepoint <= $block[1] ) {
-                               return true;
-                       }
-               }
-               return false;
-       }
-
-       /**
-        * Return the version of ICU library used by PHP's intl extension,
-        * or false when the extension is not installed of the version
-        * can't be determined.
-        *
-        * The constant INTL_ICU_VERSION this function refers to isn't really
-        * documented. It is available since PHP 5.3.7 (see PHP bug 54561).
-        * This function will return false on older PHPs.
-        *
-        * @since 1.21
-        * @return string|bool
-        */
-       static function getICUVersion() {
-               return defined( 'INTL_ICU_VERSION' ) ? INTL_ICU_VERSION : false;
-       }
-
-       /**
-        * Return the version of Unicode appropriate for the version of ICU library
-        * currently in use, or false when it can't be determined.
-        *
-        * @since 1.21
-        * @return string|bool
-        */
-       static function getUnicodeVersionForICU() {
-               $icuVersion = IcuCollation::getICUVersion();
-               if ( !$icuVersion ) {
-                       return false;
-               }
-
-               $versionPrefix = substr( $icuVersion, 0, 3 );
-               // Source: http://site.icu-project.org/download
-               $map = [
-                       '50.' => '6.2',
-                       '49.' => '6.1',
-                       '4.8' => '6.0',
-                       '4.6' => '6.0',
-                       '4.4' => '5.2',
-                       '4.2' => '5.1',
-                       '4.0' => '5.1',
-                       '3.8' => '5.0',
-                       '3.6' => '5.0',
-                       '3.4' => '4.1',
-               ];
-
-               if ( isset( $map[$versionPrefix] ) ) {
-                       return $map[$versionPrefix];
-               } else {
-                       return false;
-               }
-       }
-}
-
-/**
- * Workaround for the lack of support of Sorani Kurdish / Central Kurdish language ('ckb') in ICU.
- *
- * Uses the same collation rules as Persian / Farsi ('fa'), but different characters for digits.
- */
-class CollationCkb extends IcuCollation {
-       function __construct() {
-               // This will set $locale and collators, which affect the actual sorting order
-               parent::__construct( 'fa' );
-               // Override the 'fa' language set by parent constructor, which affects #getFirstLetterData()
-               $this->digitTransformLanguage = Language::factory( 'ckb' );
-       }
-}
-
-/**
- * Workaround for incorrect collation of Estonian language ('et') in ICU (bug 54168).
- *
- * 'W' and 'V' should not be considered the same letter for the purposes of collation in modern
- * Estonian. We work around this by replacing 'W' and 'w' with 'ᴡ' U+1D21 'LATIN LETTER SMALL
- * CAPITAL W' for sortkey generation, which is collated like 'W' and is not tailored to have the
- * same primary weight as 'V' in Estonian.
- */
-class CollationEt extends IcuCollation {
-       function __construct() {
-               parent::__construct( 'et' );
-       }
-
-       private static function mangle( $string ) {
-               return str_replace(
-                       [ 'w', 'W' ],
-                       'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W'
-                       $string
-               );
-       }
-
-       private static function unmangle( $string ) {
-               // Casing data is lost…
-               return str_replace(
-                       'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W'
-                       'W',
-                       $string
-               );
-       }
-
-       function getSortKey( $string ) {
-               return parent::getSortKey( self::mangle( $string ) );
-       }
-
-       function getFirstLetter( $string ) {
-               return self::unmangle( parent::getFirstLetter( self::mangle( $string ) ) );
-       }
-}
index 74c2997..acd5262 100644 (file)
@@ -36,9 +36,6 @@ class SiteStats {
        /** @var int[] */
        private static $pageCount = [];
 
-       /** @var int[] */
-       private static $groupMemberCounts = [];
-
        static function recache() {
                self::load( true );
        }
index 211afda..0b4d048 100644 (file)
@@ -149,7 +149,6 @@ class StubObject {
                }
 
                if ( get_class( $GLOBALS[$this->global] ) != $this->class ) {
-                       $fname = __METHOD__ . '-' . $this->global;
                        $caller = wfGetCaller( $level );
                        if ( ++$recursionLevel > 2 ) {
                                throw new MWException( "Unstub loop detected on call of "
index 386ef8c..2c3f58f 100644 (file)
@@ -17,6 +17,9 @@
  *
  * @file
  */
+use Cdb\Exception;
+use Cdb\Reader;
+use Cdb\Writer;
 
 /**
  * LCStore implementation which stores data as a collection of CDB files in the
  */
 class LCStoreCDB implements LCStore {
 
-       /** @var CdbReader[] */
+       /** @var Reader[] */
        private $readers;
 
-       /** @var CdbWriter */
+       /** @var Writer */
        private $writer;
 
        /** @var string Current language code */
@@ -61,8 +64,8 @@ class LCStoreCDB implements LCStore {
                        $this->readers[$code] = false;
                        if ( file_exists( $fileName ) ) {
                                try {
-                                       $this->readers[$code] = CdbReader::open( $fileName );
-                               } catch ( CdbException $e ) {
+                                       $this->readers[$code] = Reader::open( $fileName );
+                               } catch ( Exception $e ) {
                                        wfDebug( __METHOD__ . ": unable to open cdb file for reading\n" );
                                }
                        }
@@ -74,8 +77,8 @@ class LCStoreCDB implements LCStore {
                        $value = false;
                        try {
                                $value = $this->readers[$code]->get( $key );
-                       } catch ( CdbException $e ) {
-                               wfDebug( __METHOD__ . ": CdbException caught, error message was "
+                       } catch ( Exception $e ) {
+                               wfDebug( __METHOD__ . ": \Cdb\Exception caught, error message was "
                                        . $e->getMessage() . "\n" );
                        }
                        if ( $value === false ) {
@@ -100,8 +103,8 @@ class LCStoreCDB implements LCStore {
                }
 
                try {
-                       $this->writer = CdbWriter::open( $this->getFileName( $code ) );
-               } catch ( CdbException $e ) {
+                       $this->writer = Writer::open( $this->getFileName( $code ) );
+               } catch ( Exception $e ) {
                        throw new MWException( $e->getMessage() );
                }
                $this->currentLang = $code;
@@ -111,7 +114,7 @@ class LCStoreCDB implements LCStore {
                // Close the writer
                try {
                        $this->writer->close();
-               } catch ( CdbException $e ) {
+               } catch ( Exception $e ) {
                        throw new MWException( $e->getMessage() );
                }
                $this->writer = null;
@@ -125,7 +128,7 @@ class LCStoreCDB implements LCStore {
                }
                try {
                        $this->writer->set( $key, serialize( $value ) );
-               } catch ( CdbException $e ) {
+               } catch ( Exception $e ) {
                        throw new MWException( $e->getMessage() );
                }
        }
diff --git a/includes/collation/Collation.php b/includes/collation/Collation.php
new file mode 100644 (file)
index 0000000..9fb0660
--- /dev/null
@@ -0,0 +1,121 @@
+<?php
+/**
+ * Database row sorting.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * @since 1.16.3
+ * @author Tim Starling
+ */
+abstract class Collation {
+       private static $instance;
+
+       /**
+        * @since 1.16.3
+        * @return Collation
+        */
+       public static function singleton() {
+               if ( !self::$instance ) {
+                       global $wgCategoryCollation;
+                       self::$instance = self::factory( $wgCategoryCollation );
+               }
+               return self::$instance;
+       }
+
+       /**
+        * @since 1.16.3
+        * @throws MWException
+        * @param string $collationName
+        * @return Collation
+        */
+       public static function factory( $collationName ) {
+               switch ( $collationName ) {
+                       case 'uppercase':
+                               return new UppercaseCollation;
+                       case 'identity':
+                               return new IdentityCollation;
+                       case 'uca-default':
+                               return new IcuCollation( 'root' );
+                       case 'xx-uca-ckb':
+                               return new CollationCkb;
+                       case 'xx-uca-et':
+                               return new CollationEt;
+                       default:
+                               $match = [];
+                               if ( preg_match( '/^uca-([a-z@=-]+)$/', $collationName, $match ) ) {
+                                       return new IcuCollation( $match[1] );
+                               }
+
+                               # Provide a mechanism for extensions to hook in.
+                               $collationObject = null;
+                               Hooks::run( 'Collation::factory', [ $collationName, &$collationObject ] );
+
+                               if ( $collationObject instanceof Collation ) {
+                                       return $collationObject;
+                               }
+
+                               // If all else fails...
+                               throw new MWException( __METHOD__ . ": unknown collation type \"$collationName\"" );
+               }
+       }
+
+       /**
+        * Given a string, convert it to a (hopefully short) key that can be used
+        * for efficient sorting.  A binary sort according to the sortkeys
+        * corresponds to a logical sort of the corresponding strings.  Current
+        * code expects that a line feed character should sort before all others, but
+        * has no other particular expectations (and that one can be changed if
+        * necessary).
+        *
+        * @since 1.16.3
+        *
+        * @param string $string UTF-8 string
+        * @return string Binary sortkey
+        */
+       abstract function getSortKey( $string );
+
+       /**
+        * Given a string, return the logical "first letter" to be used for
+        * grouping on category pages and so on.  This has to be coordinated
+        * carefully with convertToSortkey(), or else the sorted list might jump
+        * back and forth between the same "initial letters" or other pathological
+        * behavior.  For instance, if you just return the first character, but "a"
+        * sorts the same as "A" based on getSortKey(), then you might get a
+        * list like
+        *
+        * == A ==
+        * * [[Aardvark]]
+        *
+        * == a ==
+        * * [[antelope]]
+        *
+        * == A ==
+        * * [[Ape]]
+        *
+        * etc., assuming for the sake of argument that $wgCapitalLinks is false.
+        *
+        * @since 1.16.3
+        *
+        * @param string $string UTF-8 string
+        * @return string UTF-8 string corresponding to the first letter of input
+        */
+       abstract function getFirstLetter( $string );
+
+}
diff --git a/includes/collation/CollationCkb.php b/includes/collation/CollationCkb.php
new file mode 100644 (file)
index 0000000..01a4f7f
--- /dev/null
@@ -0,0 +1,35 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Workaround for the lack of support of Sorani Kurdish / Central Kurdish language ('ckb') in ICU.
+ *
+ * Uses the same collation rules as Persian / Farsi ('fa'), but different characters for digits.
+ *
+ * @since 1.23
+ */
+class CollationCkb extends IcuCollation {
+       public function __construct() {
+               // This will set $locale and collators, which affect the actual sorting order
+               parent::__construct( 'fa' );
+               // Override the 'fa' language set by parent constructor, which affects #getFirstLetterData()
+               $this->digitTransformLanguage = Language::factory( 'ckb' );
+       }
+}
diff --git a/includes/collation/CollationEt.php b/includes/collation/CollationEt.php
new file mode 100644 (file)
index 0000000..5dc9fa2
--- /dev/null
@@ -0,0 +1,60 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Workaround for incorrect collation of Estonian language ('et') in ICU (bug 54168).
+ *
+ * 'W' and 'V' should not be considered the same letter for the purposes of collation in modern
+ * Estonian. We work around this by replacing 'W' and 'w' with 'ᴡ' U+1D21 'LATIN LETTER SMALL
+ * CAPITAL W' for sortkey generation, which is collated like 'W' and is not tailored to have the
+ * same primary weight as 'V' in Estonian.
+ *
+ * @since 1.24
+ */
+class CollationEt extends IcuCollation {
+       public function __construct() {
+               parent::__construct( 'et' );
+       }
+
+       private static function mangle( $string ) {
+               return str_replace(
+                       [ 'w', 'W' ],
+                       'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W'
+                       $string
+               );
+       }
+
+       private static function unmangle( $string ) {
+               // Casing data is lost…
+               return str_replace(
+                       'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W'
+                       'W',
+                       $string
+               );
+       }
+
+       public function getSortKey( $string ) {
+               return parent::getSortKey( self::mangle( $string ) );
+       }
+
+       public function getFirstLetter( $string ) {
+               return self::unmangle( parent::getFirstLetter( self::mangle( $string ) ) );
+       }
+}
diff --git a/includes/collation/IcuCollation.php b/includes/collation/IcuCollation.php
new file mode 100644 (file)
index 0000000..0aa1406
--- /dev/null
@@ -0,0 +1,481 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * @since 1.16.3
+ */
+class IcuCollation extends Collation {
+       const FIRST_LETTER_VERSION = 2;
+
+       /** @var Collator */
+       private $primaryCollator;
+
+       /** @var Collator */
+       private $mainCollator;
+
+       /** @var string */
+       private $locale;
+
+       /** @var Language */
+       protected $digitTransformLanguage;
+
+       /** @var array */
+       private $firstLetterData;
+
+       /**
+        * Unified CJK blocks.
+        *
+        * The same definition of a CJK block must be used for both Collation and
+        * generateCollationData.php. These blocks are omitted from the first
+        * letter data, as an optimisation measure and because the default UCA table
+        * is pretty useless for sorting Chinese text anyway. Japanese and Korean
+        * blocks are not included here, because they are smaller and more useful.
+        */
+       private static $cjkBlocks = [
+               [ 0x2E80, 0x2EFF ], // CJK Radicals Supplement
+               [ 0x2F00, 0x2FDF ], // Kangxi Radicals
+               [ 0x2FF0, 0x2FFF ], // Ideographic Description Characters
+               [ 0x3000, 0x303F ], // CJK Symbols and Punctuation
+               [ 0x31C0, 0x31EF ], // CJK Strokes
+               [ 0x3200, 0x32FF ], // Enclosed CJK Letters and Months
+               [ 0x3300, 0x33FF ], // CJK Compatibility
+               [ 0x3400, 0x4DBF ], // CJK Unified Ideographs Extension A
+               [ 0x4E00, 0x9FFF ], // CJK Unified Ideographs
+               [ 0xF900, 0xFAFF ], // CJK Compatibility Ideographs
+               [ 0xFE30, 0xFE4F ], // CJK Compatibility Forms
+               [ 0x20000, 0x2A6DF ], // CJK Unified Ideographs Extension B
+               [ 0x2A700, 0x2B73F ], // CJK Unified Ideographs Extension C
+               [ 0x2B740, 0x2B81F ], // CJK Unified Ideographs Extension D
+               [ 0x2F800, 0x2FA1F ], // CJK Compatibility Ideographs Supplement
+       ];
+
+       /**
+        * Additional characters (or character groups) to be considered separate
+        * letters for given languages, or to be removed from the list of such
+        * letters (denoted by keys starting with '-').
+        *
+        * These are additions to (or subtractions from) the data stored in the
+        * first-letters-root.ser file (which among others includes full basic latin,
+        * cyrillic and greek alphabets).
+        *
+        * "Separate letter" is a letter that would have a separate heading/section
+        * for it in a dictionary or a phone book in this language. This data isn't
+        * used for sorting (the ICU library handles that), only for deciding which
+        * characters (or character groups) to use as headings.
+        *
+        * Initially generated based on the primary level of Unicode collation
+        * tailorings available at http://developer.mimer.com/charts/tailorings.htm ,
+        * later modified.
+        *
+        * Empty arrays are intended; this signifies that the data for the language is
+        * available and that there are, in fact, no additional letters to consider.
+        */
+       private static $tailoringFirstLetters = [
+               // Verified by native speakers
+               'be' => [ "Ё" ],
+               'be-tarask' => [ "Ё" ],
+               'cy' => [ "Ch", "Dd", "Ff", "Ng", "Ll", "Ph", "Rh", "Th" ],
+               'en' => [],
+               'fa' => [ "آ", "ء", "ه" ],
+               'fi' => [ "Å", "Ä", "Ö" ],
+               'fr' => [],
+               'hu' => [ "Cs", "Dz", "Dzs", "Gy", "Ly", "Ny", "Ö", "Sz", "Ty", "Ü", "Zs" ],
+               'is' => [ "Á", "Ð", "É", "Í", "Ó", "Ú", "Ý", "Þ", "Æ", "Ö", "Å" ],
+               'it' => [],
+               'lv' => [ "Č", "Ģ", "Ķ", "Ļ", "Ņ", "Š", "Ž" ],
+               'pl' => [ "Ą", "Ć", "Ę", "Ł", "Ń", "Ó", "Ś", "Ź", "Ż" ],
+               'pt' => [],
+               'ru' => [],
+               'sv' => [ "Å", "Ä", "Ö" ],
+               'sv@collation=standard' => [ "Å", "Ä", "Ö" ],
+               'uk' => [ "Ґ", "Ь" ],
+               'vi' => [ "Ă", "Â", "Đ", "Ê", "Ô", "Ơ", "Ư" ],
+               // Not verified, but likely correct
+               'af' => [],
+               'ast' => [ "Ch", "Ll", "Ñ" ],
+               'az' => [ "Ç", "Ə", "Ğ", "İ", "Ö", "Ş", "Ü" ],
+               'bg' => [],
+               'br' => [ "Ch", "C'h" ],
+               'bs' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ],
+               'ca' => [],
+               'co' => [],
+               'cs' => [ "Č", "Ch", "Ř", "Š", "Ž" ],
+               'da' => [ "Æ", "Ø", "Å" ],
+               'de' => [],
+               'dsb' => [ "Č", "Ć", "Dź", "Ě", "Ch", "Ł", "Ń", "Ŕ", "Š", "Ś", "Ž", "Ź" ],
+               'el' => [],
+               'eo' => [ "Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ" ],
+               'es' => [ "Ñ" ],
+               'et' => [ "Š", "Ž", "Õ", "Ä", "Ö", "Ü", "W" ], // added W for CollationEt (xx-uca-et)
+               'eu' => [ "Ñ" ],
+               'fo' => [ "Á", "Ð", "Í", "Ó", "Ú", "Ý", "Æ", "Ø", "Å" ],
+               'fur' => [ "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ],
+               'fy' => [],
+               'ga' => [],
+               'gd' => [],
+               'gl' => [ "Ch", "Ll", "Ñ" ],
+               'hr' => [ "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ],
+               'hsb' => [ "Č", "Dź", "Ě", "Ch", "Ł", "Ń", "Ř", "Š", "Ć", "Ž" ],
+               'kk' => [ "Ү", "І" ],
+               'kl' => [ "Æ", "Ø", "Å" ],
+               'ku' => [ "Ç", "Ê", "Î", "Ş", "Û" ],
+               'ky' => [ "Ё" ],
+               'la' => [],
+               'lb' => [],
+               'lt' => [ "Č", "Š", "Ž" ],
+               'mk' => [],
+               'mo' => [ "Ă", "Â", "Î", "Ş", "Ţ" ],
+               'mt' => [ "Ċ", "Ġ", "Għ", "Ħ", "Ż" ],
+               'nl' => [],
+               'no' => [ "Æ", "Ø", "Å" ],
+               'oc' => [],
+               'rm' => [],
+               'ro' => [ "Ă", "Â", "Î", "Ş", "Ţ" ],
+               'rup' => [ "Ă", "Â", "Î", "Ľ", "Ń", "Ş", "Ţ" ],
+               'sco' => [],
+               'sk' => [ "Ä", "Č", "Ch", "Ô", "Š", "Ž" ],
+               'sl' => [ "Č", "Š", "Ž" ],
+               'smn' => [ "Á", "Č", "Đ", "Ŋ", "Š", "Ŧ", "Ž", "Æ", "Ø", "Å", "Ä", "Ö" ],
+               'sq' => [ "Ç", "Dh", "Ë", "Gj", "Ll", "Nj", "Rr", "Sh", "Th", "Xh", "Zh" ],
+               'sr' => [],
+               'tk' => [ "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ],
+               'tl' => [ "Ñ", "Ng" ],
+               'tr' => [ "Ç", "Ğ", "İ", "Ö", "Ş", "Ü" ],
+               'tt' => [ "Ә", "Ө", "Ү", "Җ", "Ң", "Һ" ],
+               'uz' => [ "Ch", "G'", "Ng", "O'", "Sh" ],
+       ];
+
+       /**
+        * @since 1.16.3
+        */
+       const RECORD_LENGTH = 14;
+
+       public function __construct( $locale ) {
+               if ( !extension_loaded( 'intl' ) ) {
+                       throw new MWException( 'An ICU collation was requested, ' .
+                               'but the intl extension is not available.' );
+               }
+
+               $this->locale = $locale;
+               // Drop everything after the '@' in locale's name
+               $localeParts = explode( '@', $locale );
+               $this->digitTransformLanguage = Language::factory( $locale === 'root' ? 'en' : $localeParts[0] );
+
+               $this->mainCollator = Collator::create( $locale );
+               if ( !$this->mainCollator ) {
+                       throw new MWException( "Invalid ICU locale specified for collation: $locale" );
+               }
+
+               $this->primaryCollator = Collator::create( $locale );
+               $this->primaryCollator->setStrength( Collator::PRIMARY );
+       }
+
+       public function getSortKey( $string ) {
+               // intl extension produces non null-terminated
+               // strings. Appending '' fixes it so that it doesn't generate
+               // a warning on each access in debug php.
+               MediaWiki\suppressWarnings();
+               $key = $this->mainCollator->getSortKey( $string ) . '';
+               MediaWiki\restoreWarnings();
+               return $key;
+       }
+
+       public function getPrimarySortKey( $string ) {
+               MediaWiki\suppressWarnings();
+               $key = $this->primaryCollator->getSortKey( $string ) . '';
+               MediaWiki\restoreWarnings();
+               return $key;
+       }
+
+       public function getFirstLetter( $string ) {
+               $string = strval( $string );
+               if ( $string === '' ) {
+                       return '';
+               }
+
+               // Check for CJK
+               $firstChar = mb_substr( $string, 0, 1, 'UTF-8' );
+               if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
+                       return $firstChar;
+               }
+
+               $sortKey = $this->getPrimarySortKey( $string );
+
+               // Do a binary search to find the correct letter to sort under
+               $min = ArrayUtils::findLowerBound(
+                       [ $this, 'getSortKeyByLetterIndex' ],
+                       $this->getFirstLetterCount(),
+                       'strcmp',
+                       $sortKey );
+
+               if ( $min === false ) {
+                       // Before the first letter
+                       return '';
+               }
+               return $this->getLetterByIndex( $min );
+       }
+
+       /**
+        * @since 1.16.3
+        */
+       public function getFirstLetterData() {
+               if ( $this->firstLetterData !== null ) {
+                       return $this->firstLetterData;
+               }
+
+               $cache = wfGetCache( CACHE_ANYTHING );
+               $cacheKey = wfMemcKey(
+                       'first-letters',
+                       $this->locale,
+                       $this->digitTransformLanguage->getCode(),
+                       self::getICUVersion()
+               );
+               $cacheEntry = $cache->get( $cacheKey );
+
+               if ( $cacheEntry && isset( $cacheEntry['version'] )
+                       && $cacheEntry['version'] == self::FIRST_LETTER_VERSION
+               ) {
+                       $this->firstLetterData = $cacheEntry;
+                       return $this->firstLetterData;
+               }
+
+               // Generate data from serialized data file
+
+               if ( isset( self::$tailoringFirstLetters[$this->locale] ) ) {
+                       $letters = wfGetPrecompiledData( "first-letters-root.ser" );
+                       // Append additional characters
+                       $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] );
+                       // Remove unnecessary ones, if any
+                       if ( isset( self::$tailoringFirstLetters['-' . $this->locale] ) ) {
+                               $letters = array_diff( $letters, self::$tailoringFirstLetters['-' . $this->locale] );
+                       }
+                       // Apply digit transforms
+                       $digits = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ];
+                       $letters = array_diff( $letters, $digits );
+                       foreach ( $digits as $digit ) {
+                               $letters[] = $this->digitTransformLanguage->formatNum( $digit, true );
+                       }
+               } else {
+                       $letters = wfGetPrecompiledData( "first-letters-{$this->locale}.ser" );
+                       if ( $letters === false ) {
+                               throw new MWException( "MediaWiki does not support ICU locale " .
+                                       "\"{$this->locale}\"" );
+                       }
+               }
+
+               /* Sort the letters.
+                *
+                * It's impossible to have the precompiled data file properly sorted,
+                * because the sort order changes depending on ICU version. If the
+                * array is not properly sorted, the binary search will return random
+                * results.
+                *
+                * We also take this opportunity to remove primary collisions.
+                */
+               $letterMap = [];
+               foreach ( $letters as $letter ) {
+                       $key = $this->getPrimarySortKey( $letter );
+                       if ( isset( $letterMap[$key] ) ) {
+                               // Primary collision
+                               // Keep whichever one sorts first in the main collator
+                               if ( $this->mainCollator->compare( $letter, $letterMap[$key] ) < 0 ) {
+                                       $letterMap[$key] = $letter;
+                               }
+                       } else {
+                               $letterMap[$key] = $letter;
+                       }
+               }
+               ksort( $letterMap, SORT_STRING );
+
+               /* Remove duplicate prefixes. Basically if something has a sortkey
+                * which is a prefix of some other sortkey, then it is an
+                * expansion and probably should not be considered a section
+                * header.
+                *
+                * For example 'þ' is sometimes sorted as if it is the letters
+                * 'th'. Other times it is its own primary element. Another
+                * example is '₨'. Sometimes its a currency symbol. Sometimes it
+                * is an 'R' followed by an 's'.
+                *
+                * Additionally an expanded element should always sort directly
+                * after its first element due to they way sortkeys work.
+                *
+                * UCA sortkey elements are of variable length but no collation
+                * element should be a prefix of some other element, so I think
+                * this is safe. See:
+                * - https://ssl.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
+                * - http://site.icu-project.org/design/collation/uca-weight-allocation
+                *
+                * Additionally, there is something called primary compression to
+                * worry about. Basically, if you have two primary elements that
+                * are more than one byte and both start with the same byte then
+                * the first byte is dropped on the second primary. Additionally
+                * either \x03 or \xFF may be added to mean that the next primary
+                * does not start with the first byte of the first primary.
+                *
+                * This shouldn't matter much, as the first primary is not
+                * changed, and that is what we are comparing against.
+                *
+                * tl;dr: This makes some assumptions about how icu implements
+                * collations. It seems incredibly unlikely these assumptions
+                * will change, but nonetheless they are assumptions.
+                */
+
+               $prev = false;
+               $duplicatePrefixes = [];
+               foreach ( $letterMap as $key => $value ) {
+                       // Remove terminator byte. Otherwise the prefix
+                       // comparison will get hung up on that.
+                       $trimmedKey = rtrim( $key, "\0" );
+                       if ( $prev === false || $prev === '' ) {
+                               $prev = $trimmedKey;
+                               // We don't yet have a collation element
+                               // to compare against, so continue.
+                               continue;
+                       }
+
+                       // Due to the fact the array is sorted, we only have
+                       // to compare with the element directly previous
+                       // to the current element (skipping expansions).
+                       // An element "X" will always sort directly
+                       // before "XZ" (Unless we have "XY", but we
+                       // do not update $prev in that case).
+                       if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) {
+                               $duplicatePrefixes[] = $key;
+                               // If this is an expansion, we don't want to
+                               // compare the next element to this element,
+                               // but to what is currently $prev
+                               continue;
+                       }
+                       $prev = $trimmedKey;
+               }
+               foreach ( $duplicatePrefixes as $badKey ) {
+                       wfDebug( "Removing '{$letterMap[$badKey]}' from first letters.\n" );
+                       unset( $letterMap[$badKey] );
+                       // This code assumes that unsetting does not change sort order.
+               }
+               $data = [
+                       'chars' => array_values( $letterMap ),
+                       'keys' => array_keys( $letterMap ),
+                       'version' => self::FIRST_LETTER_VERSION,
+               ];
+
+               // Reduce memory usage before caching
+               unset( $letterMap );
+
+               // Save to cache
+               $this->firstLetterData = $data;
+               $cache->set( $cacheKey, $data, $cache::TTL_WEEK );
+               return $data;
+       }
+
+       /**
+        * @since 1.16.3
+        */
+       public function getLetterByIndex( $index ) {
+               if ( $this->firstLetterData === null ) {
+                       $this->getFirstLetterData();
+               }
+               return $this->firstLetterData['chars'][$index];
+       }
+
+       /**
+        * @since 1.16.3
+        */
+       public function getSortKeyByLetterIndex( $index ) {
+               if ( $this->firstLetterData === null ) {
+                       $this->getFirstLetterData();
+               }
+               return $this->firstLetterData['keys'][$index];
+       }
+
+       /**
+        * @since 1.16.3
+        */
+       public function getFirstLetterCount() {
+               if ( $this->firstLetterData === null ) {
+                       $this->getFirstLetterData();
+               }
+               return count( $this->firstLetterData['chars'] );
+       }
+
+       /**
+        * @since 1.16.3
+        */
+       public static function isCjk( $codepoint ) {
+               foreach ( self::$cjkBlocks as $block ) {
+                       if ( $codepoint >= $block[0] && $codepoint <= $block[1] ) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       /**
+        * Return the version of ICU library used by PHP's intl extension,
+        * or false when the extension is not installed of the version
+        * can't be determined.
+        *
+        * The constant INTL_ICU_VERSION this function refers to isn't really
+        * documented. It is available since PHP 5.3.7 (see PHP bug 54561).
+        * This function will return false on older PHPs.
+        *
+        * @since 1.21
+        * @return string|bool
+        */
+       static function getICUVersion() {
+               return defined( 'INTL_ICU_VERSION' ) ? INTL_ICU_VERSION : false;
+       }
+
+       /**
+        * Return the version of Unicode appropriate for the version of ICU library
+        * currently in use, or false when it can't be determined.
+        *
+        * @since 1.21
+        * @return string|bool
+        */
+       static function getUnicodeVersionForICU() {
+               $icuVersion = IcuCollation::getICUVersion();
+               if ( !$icuVersion ) {
+                       return false;
+               }
+
+               $versionPrefix = substr( $icuVersion, 0, 3 );
+               // Source: http://site.icu-project.org/download
+               $map = [
+                       '50.' => '6.2',
+                       '49.' => '6.1',
+                       '4.8' => '6.0',
+                       '4.6' => '6.0',
+                       '4.4' => '5.2',
+                       '4.2' => '5.1',
+                       '4.0' => '5.1',
+                       '3.8' => '5.0',
+                       '3.6' => '5.0',
+                       '3.4' => '4.1',
+               ];
+
+               if ( isset( $map[$versionPrefix] ) ) {
+                       return $map[$versionPrefix];
+               } else {
+                       return false;
+               }
+       }
+}
diff --git a/includes/collation/IdentityCollation.php b/includes/collation/IdentityCollation.php
new file mode 100644 (file)
index 0000000..46e7f38
--- /dev/null
@@ -0,0 +1,44 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Collation class that's essentially a no-op.
+ *
+ * Does sorting based on binary value of the string.
+ * Like how things were pre 1.17.
+ *
+ * @since 1.18
+ */
+class IdentityCollation extends Collation {
+
+       public function getSortKey( $string ) {
+               return $string;
+       }
+
+       public function getFirstLetter( $string ) {
+               global $wgContLang;
+               // Copied from UppercaseCollation.
+               // I'm kind of unclear on when this could happen...
+               if ( $string[0] == "\0" ) {
+                       $string = substr( $string, 1 );
+               }
+               return $wgContLang->firstChar( $string );
+       }
+}
diff --git a/includes/collation/UppercaseCollation.php b/includes/collation/UppercaseCollation.php
new file mode 100644 (file)
index 0000000..92a4c3b
--- /dev/null
@@ -0,0 +1,44 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 1.16.3
+ *
+ * @file
+ */
+
+class UppercaseCollation extends Collation {
+
+       private $lang;
+
+       public function __construct() {
+               // Get a language object so that we can use the generic UTF-8 uppercase
+               // function there
+               $this->lang = Language::factory( 'en' );
+       }
+
+       public function getSortKey( $string ) {
+               return $this->lang->uc( $string );
+       }
+
+       public function getFirstLetter( $string ) {
+               if ( $string[0] == "\0" ) {
+                       $string = substr( $string, 1 );
+               }
+               return $this->lang->ucfirst( $this->lang->firstChar( $string ) );
+       }
+
+}
index c385c38..b61cae7 100644 (file)
@@ -55,11 +55,3 @@ abstract class EventRelayer {
        abstract protected function doNotify( $channel, array $events );
 }
 
-/**
- * No-op class for publishing messages into a PubSub system
- */
-class EventRelayerNull extends EventRelayer {
-       public function doNotify( $channel, array $events ) {
-               return true;
-       }
-}
diff --git a/includes/libs/eventrelayer/EventRelayerNull.php b/includes/libs/eventrelayer/EventRelayerNull.php
new file mode 100644 (file)
index 0000000..b8ec55f
--- /dev/null
@@ -0,0 +1,29 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ */
+
+/**
+ * No-op class for publishing messages into a PubSub system
+ */
+class EventRelayerNull extends EventRelayer {
+       public function doNotify( $channel, array $events ) {
+               return true;
+       }
+}
index b055d16..4da41c8 100644 (file)
@@ -115,7 +115,7 @@ class BitmapHandler extends TransformationalImageHandler {
        protected function transformImageMagick( $image, $params ) {
                # use ImageMagick
                global $wgSharpenReductionThreshold, $wgSharpenParameter, $wgMaxAnimatedGifArea,
-                       $wgImageMagickTempDir, $wgImageMagickConvertCommand, $wgMaxInterlacingAreas;
+                       $wgImageMagickTempDir, $wgImageMagickConvertCommand;
 
                $quality = [];
                $sharpen = [];
index 6a01e87..35e885f 100644 (file)
@@ -63,9 +63,9 @@ class WebPHandler extends BitmapHandler {
                                return self::METADATA_GOOD;
                }
 
-               wfSuppressWarnings();
+               MediaWiki\suppressWarnings();
                $data = unserialize( $metadata );
-               wfRestoreWarnings();
+               MediaWiki\restoreWarnings();
 
                if ( !$data || !is_array( $data ) ) {
                                wfDebug( __METHOD__ . " invalid WebP metadata\n" );
@@ -234,9 +234,9 @@ class WebPHandler extends BitmapHandler {
                        $metadata = $file->getMetadata();
                }
 
-               wfSuppressWarnings();
+               MediaWiki\suppressWarnings();
                $metadata = unserialize( $metadata );
-               wfRestoreWarnings();
+               MediaWiki\restoreWarnings();
 
                if ( $metadata == false ) {
                        return false;
index 90ace7b..fb153fc 100644 (file)
@@ -342,10 +342,10 @@ class SpecialPage {
                        return [];
                }
 
-               $search = SearchEngine::create();
-               $search->setLimitOffset( $limit, $offset );
-               $search->setNamespaces( [] );
-               $result = $search->defaultPrefixSearch( $search );
+               $searchEngine = SearchEngine::create();
+               $searchEngine->setLimitOffset( $limit, $offset );
+               $searchEngine->setNamespaces( [] );
+               $result = $searchEngine->defaultPrefixSearch( $search );
                return array_map( function( Title $t ) {
                        return $t->getPrefixedText();
                }, $result );
index bcba190..1027f1f 100644 (file)
@@ -84,9 +84,6 @@ class SpecialBotPasswords extends FormSpecialPage {
        }
 
        protected function getFormFields() {
-               $user = $this->getUser();
-               $request = $this->getRequest();
-
                $fields = [];
 
                if ( $this->par !== null ) {
index c352927..86f1e20 100644 (file)
@@ -36,8 +36,6 @@ class SpecialStatistics extends SpecialPage {
        }
 
        public function execute( $par ) {
-               $miserMode = $this->getConfig()->get( 'MiserMode' );
-
                $this->setHeaders();
                $this->getOutput()->addModuleStyles( 'mediawiki.special' );
 
index c4dfd8f..60f94a5 100644 (file)
@@ -70,7 +70,7 @@ class McTest extends Maintenance {
                                $server # output channel
                        );
 
-                       $mcc = new MemCachedClientforWiki( [
+                       $mcc = new MemcachedClient( [
                                'persistant' => true,
                                'timeout' => $wgMemCachedTimeout
                        ] );
index a9a1e7a..938397a 100644 (file)
@@ -47,9 +47,9 @@ class AvroFormatterTest extends MediaWikiTestCase {
                // disable conversion of notices
                PHPUnit_Framework_Error_Notice::$enabled = false;
                // have to keep the user notice from being output
-               wfSuppressWarnings();
+               \MediaWiki\suppressWarnings();
                $res = $formatter->format( [ 'channel' => 'marty' ] );
-               wfRestoreWarnings();
+               \MediaWiki\restoreWarnings();
                PHPUnit_Framework_Error_Notice::$enabled = $noticeEnabled;
                $this->assertNull( $res );
        }