From 27b424066453d59eeceda48a43d51e4915da960d Mon Sep 17 00:00:00 2001 From: Santhosh Thottingal Date: Tue, 23 Jul 2019 12:20:52 +0530 Subject: [PATCH] Update LanguageTrTest::testDottedAndDotlessI for PHP 7.3 PHP 7.3+ uses Unicode CaseFolding.txt for case mappings. For Turkic languages(tr,az) the dotted i is given as a special case and we need to implement it specifically for tr and az. Updated the documentation and refactored the lcfirst and ucfirst methods to use arrays containing the above mentioned special cases. Bug: T207100 Change-Id: I317f2ca66b0adeaa79bc0f9e3dea5edfcd5e4693 --- languages/classes/LanguageTr.php | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/languages/classes/LanguageTr.php b/languages/classes/LanguageTr.php index 49ee88ac8d..03790fa530 100644 --- a/languages/classes/LanguageTr.php +++ b/languages/classes/LanguageTr.php @@ -24,21 +24,33 @@ /** * Turkish (Türkçe) * - * Turkish has two different i, one with a dot and another without a dot. They - * are totally different letters in this language, so we have to override the + * The Turkish language, like other Turkic languages, distinguishes + * a dotted letter 'i' from a dotless letter 'ı' (U+0131 LATIN SMALL LETTER DOTLESS I). + * In these languages, each has an equivalent uppercase mapping: + * ı (U+0131 LATIN SMALL LETTER DOTLESS I) -> I (U+0049 LATIN CAPITAL LETTER I), + * i (U+0069 LATIN SMALL LETTER I) -> İ (U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE). + * + * Unicode CaseFolding.txt defines this case as type 'T', a special case for Turkic languages: + * tr and az. PHP 7.3 parser ignores this special cases. so we have to override the * ucfirst and lcfirst methods. + * * See https://en.wikipedia.org/wiki/Dotted_and_dotless_I and T30040 * @ingroup Language */ class LanguageTr extends Language { + private $uc = [ 'I', 'İ' ]; + private $lc = [ 'ı', 'i' ]; + /** * @param string $string * @return string */ public function ucfirst( $string ) { - if ( strlen( $string ) && $string[0] == 'i' ) { - return 'İ' . substr( $string, 1 ); + $first = mb_substr( $string, 0, 1 ); + if ( in_array( $first, $this->lc ) ) { + $first = str_replace( $this->lc, $this->uc, $first ); + return $first . mb_substr( $string, 1 ); } return parent::ucfirst( $string ); } @@ -48,8 +60,10 @@ class LanguageTr extends Language { * @return mixed|string */ function lcfirst( $string ) { - if ( strlen( $string ) && $string[0] == 'I' ) { - return 'ı' . substr( $string, 1 ); + $first = mb_substr( $string, 0, 1 ); + if ( in_array( $first, $this->uc ) ) { + $first = str_replace( $this->uc, $this->lc, $first ); + return $first . mb_substr( $string, 1 ); } return parent::lcfirst( $string ); } -- 2.20.1