From: Santhosh Thottingal Date: Tue, 23 Jul 2019 06:50:52 +0000 (+0530) Subject: Update LanguageTrTest::testDottedAndDotlessI for PHP 7.3 X-Git-Tag: 1.31.4~15 X-Git-Url: http://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=d6ba90904c15d0fd8f1b84820a7d99678a1fd025 Update LanguageTrTest::testDottedAndDotlessI for PHP 7.3 PHP 7.3+ uses Unicode CaseFolding.txt for case mappings. For Turkic languages(tr,az) the dotted i is given as a special case and we need to implement it specifically for tr and az. Updated the documentation and refactored the lcfirst and ucfirst methods to use arrays containing the above mentioned special cases. Bug: T207100 Change-Id: I317f2ca66b0adeaa79bc0f9e3dea5edfcd5e4693 (cherry picked from commit 27b424066453d59eeceda48a43d51e4915da960d) --- diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31 index ba77da38b0..b3b1cc0412 100644 --- a/RELEASE-NOTES-1.31 +++ b/RELEASE-NOTES-1.31 @@ -13,6 +13,7 @@ This is a maintenance release of the MediaWiki 1.31 branch. * (T225496) Detect APC for MainCacheType in CLI installer. * (T226766) Remove jetbrains/phpstorm-stubs from composer dev dependancies. * (T202211) Fix SQLite patch-(image|page|template)links-fix-pk.sql column order. +* (T207100) Updated LanguageTr for dotted and dotless I in PHP 7.3. == MediaWiki 1.31.2 == diff --git a/languages/classes/LanguageTr.php b/languages/classes/LanguageTr.php index 49ee88ac8d..03790fa530 100644 --- a/languages/classes/LanguageTr.php +++ b/languages/classes/LanguageTr.php @@ -24,21 +24,33 @@ /** * Turkish (Türkçe) * - * Turkish has two different i, one with a dot and another without a dot. They - * are totally different letters in this language, so we have to override the + * The Turkish language, like other Turkic languages, distinguishes + * a dotted letter 'i' from a dotless letter 'ı' (U+0131 LATIN SMALL LETTER DOTLESS I). + * In these languages, each has an equivalent uppercase mapping: + * ı (U+0131 LATIN SMALL LETTER DOTLESS I) -> I (U+0049 LATIN CAPITAL LETTER I), + * i (U+0069 LATIN SMALL LETTER I) -> İ (U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE). + * + * Unicode CaseFolding.txt defines this case as type 'T', a special case for Turkic languages: + * tr and az. PHP 7.3 parser ignores this special cases. so we have to override the * ucfirst and lcfirst methods. + * * See https://en.wikipedia.org/wiki/Dotted_and_dotless_I and T30040 * @ingroup Language */ class LanguageTr extends Language { + private $uc = [ 'I', 'İ' ]; + private $lc = [ 'ı', 'i' ]; + /** * @param string $string * @return string */ public function ucfirst( $string ) { - if ( strlen( $string ) && $string[0] == 'i' ) { - return 'İ' . substr( $string, 1 ); + $first = mb_substr( $string, 0, 1 ); + if ( in_array( $first, $this->lc ) ) { + $first = str_replace( $this->lc, $this->uc, $first ); + return $first . mb_substr( $string, 1 ); } return parent::ucfirst( $string ); } @@ -48,8 +60,10 @@ class LanguageTr extends Language { * @return mixed|string */ function lcfirst( $string ) { - if ( strlen( $string ) && $string[0] == 'I' ) { - return 'ı' . substr( $string, 1 ); + $first = mb_substr( $string, 0, 1 ); + if ( in_array( $first, $this->uc ) ) { + $first = str_replace( $this->uc, $this->lc, $first ); + return $first . mb_substr( $string, 1 ); } return parent::lcfirst( $string ); }