X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FCollation.php;h=87afc103a6b8f7f67751f3ff577a505fac96a7ec;hb=30ba4c00ebfa163b4ae2304f1db83d6dc56e1598;hp=3cc7902855d1210962cc926308d9bf80b4c07863;hpb=29ab088e2fb938b0b3330bc4d4891a155e9420d2;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Collation.php b/includes/Collation.php index 3cc7902855..87afc103a6 100644 --- a/includes/Collation.php +++ b/includes/Collation.php @@ -48,8 +48,12 @@ abstract class Collation { case 'uca-default': return new IcuCollation( 'root' ); default: - # Provide a mechanism for extensions to hook in. + $match = array(); + if ( preg_match( '/^uca-([a-z-]+)$/', $collationName, $match ) ) { + return new IcuCollation( $match[1] ); + } + # Provide a mechanism for extensions to hook in. $collationObject = null; wfRunHooks( 'Collation::factory', array( $collationName, &$collationObject ) ); @@ -176,6 +180,87 @@ class IcuCollation extends Collation { array( 0x2F800, 0x2FA1F ), // CJK Compatibility Ideographs Supplement ); + /** + * Additional characters (or character groups) to be considered first-letters + * + * Generated based on the primary level of Unicode collation tailorings + * available at http://developer.mimer.com/charts/tailorings.htm . + * + * Empty arrays are intended; this signifies that the data for the language is + * available and that there are, in fact, no additional letters to consider. + */ + static $tailoringFirstLetters = array( + // Verified by native speakers + 'pl' => array( "Ą", "Ć", "Ę", "Ł", "Ń", "Ó", "Ś", "Ź", "Ż" ), + 'fi' => array( "Å", "Ä", "Ö" ), + // Not verified, but likely correct + 'af' => array(), + 'ast' => array( "CH", "LL", "Ñ" ), + 'az' => array( "Ç", "Ə", "Ğ", "İ", "Ö", "Ş", "Ü" ), + 'be' => array( "Ё" ), + 'bg' => array(), + 'br' => array( "CH", "C'H" ), + 'bs' => array( "Č", "Ć", "DŽ", "Đ", "LJ", "NJ", "Š", "Ž" ), + 'ca' => array(), + 'co' => array(), + 'cs' => array( "Č", "CH", "Ř", "Š", "Ž" ), + 'cy' => array( "CH", "DD", "FF", "NG", "LL", "PH", "RH", "TH" ), + 'da' => array( "Æ", "Ø", "Å" ), + 'de' => array(), + 'dsb' => array( "Č", "Ć", "DŹ", "Ě", "CH", "Ł", "Ń", "Ŕ", "Š", "Ś", "Ž", "Ź" ), + 'el' => array(), + 'en' => array(), + 'eo' => array( "Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ" ), + 'es' => array( "Ñ" ), + 'et' => array( "Š", "Ž", "Õ", "Ä", "Ö", "Ü" ), + 'eu' => array( "Ñ" ), + 'fo' => array( "Á", "Ð", "Í", "Ó", "Ú", "Ý", "Æ", "Ø", "Å" ), + 'fr' => array(), + 'fur' => array( "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ), + 'fy' => array(), + 'ga' => array(), + 'gd' => array(), + 'gl' => array( "CH", "LL", "Ñ" ), + 'hr' => array( "Č", "Ć", "DŽ", "Đ", "LJ", "NJ", "Š", "Ž" ), + 'hsb' => array( "Č", "DŹ", "Ě", "CH", "Ł", "Ń", "Ř", "Š", "Ć", "Ž" ), + 'hu' => array( "CS", "DZ", "DZS", "GY", "LY", "NY", "Ö", "SZ", "TY", "Ü", "ZS" ), + 'is' => array( "Á", "Ð", "É", "Í", "Ó", "Ú", "Ý", "Þ", "Æ", "Ö", "Å" ), + 'it' => array(), + 'kk' => array( "Ү", "І" ), + 'kl' => array( "Æ", "Ø", "Å" ), + 'ku' => array( "Ç", "Ê", "Î", "Ş", "Û" ), + 'ky' => array( "Ё" ), + 'la' => array(), + 'lb' => array(), + 'lt' => array( "Č", "Š", "Ž" ), + 'lv' => array( "Č", "Ģ", "Ķ", "Ļ", "Ņ", "Š", "Ž" ), + 'mk' => array(), + 'mo' => array( "Ă", "Â", "Î", "Ş", "Ţ" ), + 'mt' => array( "Ċ", "Ġ", "GĦ", "Ħ", "Ż" ), + 'nl' => array(), + 'no' => array( "Æ", "Ø", "Å" ), + 'oc' => array(), + 'pt' => array(), + 'rm' => array(), + 'ro' => array( "Ă", "Â", "Î", "Ş", "Ţ" ), + 'ru' => array(), + 'rup' => array( "Ă", "Â", "Î", "Ľ", "Ń", "Ş", "Ţ" ), + 'sco' => array(), + 'sk' => array( "Ä", "Č", "CH", "Ô", "Š", "Ž" ), + 'sl' => array( "Č", "Š", "Ž" ), + 'smn' => array( "Á", "Č", "Đ", "Ŋ", "Š", "Ŧ", "Ž", "Æ", "Ø", "Å", "Ä", "Ö" ), + 'sq' => array( "Ç", "DH", "Ë", "GJ", "LL", "NJ", "RR", "SH", "TH", "XH", "ZH" ), + 'sr' => array(), + 'sv' => array( "Å", "Ä", "Ö" ), + 'tk' => array( "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ), + 'tl' => array( "Ñ", "NG" ), /* 'fil' in the data source */ + 'tr' => array( "Ç", "Ğ", "İ", "Ö", "Ş", "Ü" ), + 'tt' => array( "Ә", "Ө", "Ү", "Җ", "Ң", "Һ" ), + 'uk' => array( "Ґ", "Ь" ), + 'uz' => array( "CH", "G'", "NG", "O'", "SH" ), + 'vi' => array( "Ă", "Â", "Đ", "Ê", "Ô", "Ơ", "Ư" ), + ); + const RECORD_LENGTH = 14; function __construct( $locale ) { @@ -256,10 +341,15 @@ class IcuCollation extends Collation { // Generate data from serialized data file - $letters = wfGetPrecompiledData( "first-letters-{$this->locale}.ser" ); - if ( $letters === false ) { - throw new MWException( "MediaWiki does not support ICU locale " . - "\"{$this->locale}\"" ); + if ( isset ( self::$tailoringFirstLetters[$this->locale] ) ) { + $letters = wfGetPrecompiledData( "first-letters-root.ser" ); + $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] ); + } else { + $letters = wfGetPrecompiledData( "first-letters-{$this->locale}.ser" ); + if ( $letters === false ) { + throw new MWException( "MediaWiki does not support ICU locale " . + "\"{$this->locale}\"" ); + } } // Sort the letters. @@ -374,5 +464,55 @@ class IcuCollation extends Collation { } return false; } -} + /** + * Return the version of ICU library used by PHP's intl extension, + * or false when the extension is not installed of the version + * can't be determined. + * + * The constant INTL_ICU_VERSION this function refers to isn't really + * documented. It is available since PHP 5.3.7 (see PHP bug 54561). + * This function will return false on older PHPs. + * + * @since 1.21 + * @return string|false + */ + static function getICUVersion() { + return defined( 'INTL_ICU_VERSION' ) ? INTL_ICU_VERSION : false; + } + + /** + * Return the version of Unicode appropriate for the version of ICU library + * currently in use, or false when it can't be determined. + * + * @since 1.21 + * @return string|false + */ + static function getUnicodeVersionForICU() { + $icuVersion = IcuCollation::getICUVersion(); + if ( !$icuVersion ) { + return false; + } + + $versionPrefix = substr( $icuVersion, 0, 3 ); + // Source: http://site.icu-project.org/download + $map = array( + '50.' => '6.2', + '49.' => '6.1', + '4.8' => '6.0', + '4.6' => '6.0', + '4.4' => '5.2', + '4.2' => '5.1', + '4.0' => '5.1', + '3.8' => '5.0', + '3.6' => '5.0', + '3.4' => '4.1', + ); + + if ( isset( $map[$versionPrefix] ) ) { + return $map[$versionPrefix]; + } else { + return false; + } + } +}