From: C. Scott Ananian Date: Sat, 12 May 2018 18:37:09 +0000 (-0400) Subject: Minor fixes to CRH language conversion. X-Git-Tag: 1.34.0-rc.0~5352^2 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=685eba436014225a3e81446f552569953d1b5efd Minor fixes to CRH language conversion. * Move a many-to-one mapping from the L2C to the C2L table where it belongs. * Fix some regular expression patterns which ended up with misnumbered replacement strings. * All regular expressions should have the `u` (unicode) flag set. * Typo/spelling fixes in comments Change-Id: If933fc67845ac994d9ddfdf8349aff445ec9b13a --- diff --git a/languages/classes/LanguageCrh.php b/languages/classes/LanguageCrh.php index 1698b9fbb8..e652a52751 100644 --- a/languages/classes/LanguageCrh.php +++ b/languages/classes/LanguageCrh.php @@ -189,7 +189,7 @@ class CrhConverter extends LanguageConverter { /** * It translates text into variant, specials: - * - ommiting roman numbers + * - omitting roman numbers * * @param string $text * @param bool $toVariant diff --git a/languages/data/CrhExceptions.php b/languages/data/CrhExceptions.php index e3bb1561f9..669c802ed5 100644 --- a/languages/data/CrhExceptions.php +++ b/languages/data/CrhExceptions.php @@ -124,6 +124,7 @@ class CrhExceptions { 'beyude' => 'бейуде', 'beyüde' => 'бейуде', 'curat' => 'джурьат', 'cürat' => 'джурьат', 'mesul' => 'месуль', 'mesül' => 'месуль', + 'yetsin' => 'етсин', 'etsin' => 'етсин', ]; # map Cyrillic to Latin and back, simple string match only (no regex) @@ -211,7 +212,6 @@ class CrhExceptions { 'оригинал' => 'original', 'оригиналь' => 'original', 'пускю' => 'püskü', 'пуськю' => 'püskü', 'къарагоз' => 'qaragöz', 'къарагозь' => 'qaragöz', - 'етсин' => 'yetsin', 'етсин' => 'etsin', #### Latin to Cyrillic (deduped from above) @@ -511,7 +511,7 @@ class CrhExceptions { '/\b(['.Crh::C_M_CONS.'])У(['.Crh::C_CONS.'])(['.Crh::C_CONS.'])([еиэюьüЕИЭЮЬÜ])/u' => '$1Ü$2$3$4', '/\bУ(['.Crh::C_CONS.'])(['.Crh::C_CONS.'])([еиэюьüЕИЭЮЬÜ])/u' => 'Ü$1$2$3', - '/\bУю(['.Crh::C_CONS.'])(['.Crh::C_CONS.'])([еиэюьü])/u' => 'Üyü$1$2$2', + '/\bУю(['.Crh::C_CONS.'])(['.Crh::C_CONS.'])([еиэюьü])/u' => 'Üyü$1$2$3', '/\bУЮ(['.Crh::C_CONS.'])(['.Crh::C_CONS.'])([еиэюьü])/u' => 'ÜYÜ$1$2$3', '/\b(['.Crh::C_M_CONS.'])у(['.Crh::C_CONS.'])([еиэюьü])/u' => '$1ü$2$3', @@ -567,12 +567,12 @@ class CrhExceptions { # остальные вхождения о, у, ё, ю # other occurences of о, у, ё, ю - '/Ё(['.Crh::C_UC.'CĞÑQÖÜ])/u' => 'YO$2', - '/Ю(['.Crh::C_UC.'CĞÑQÖÜ])/u' => 'YU$2', + '/Ё(['.Crh::C_UC.'CĞÑQÖÜ])/u' => 'YO$1', + '/Ю(['.Crh::C_UC.'CĞÑQÖÜ])/u' => 'YU$1', # Ц & Щ - '/Ц(['.Crh::C_UC.'CĞÑQÖÜ])/u' => 'TS$2', - '/Щ(['.Crh::C_UC.'CĞÑQÖÜ])/u' => 'ŞÇ$2', + '/Ц(['.Crh::C_UC.'CĞÑQÖÜ])/u' => 'TS$1', + '/Щ(['.Crh::C_UC.'CĞÑQÖÜ])/u' => 'ŞÇ$1', ]; $this->Latn2CyrlRegexes = [ @@ -685,7 +685,7 @@ class CrhExceptions { '/KÖZ([^EÜ])/u' => 'КОЗЬ$1', # Punctuation - '/#|No\./' => '№', + '/#|No\./u' => '№', # некоторые случаи употребления Ц '/tsi([^zñ])/u' => 'ци$1', diff --git a/tests/phpunit/languages/classes/LanguageSrTest.php b/tests/phpunit/languages/classes/LanguageSrTest.php index e81d537060..30b4df891d 100644 --- a/tests/phpunit/languages/classes/LanguageSrTest.php +++ b/tests/phpunit/languages/classes/LanguageSrTest.php @@ -68,7 +68,7 @@ class LanguageSrTest extends LanguageClassesTestCase { * @covers LanguageConverter::convertTo */ public function testConversionToCyrillic() { - // A simple convertion of Latin to Cyrillic + // A simple conversion of Latin to Cyrillic $this->assertEquals( 'абвг', $this->convertToCyrillic( 'abvg' ) ); @@ -76,7 +76,7 @@ class LanguageSrTest extends LanguageClassesTestCase { $this->assertEquals( 'ljабnjвгdž', $this->convertToCyrillic( '-{lj}-ab-{nj}-vg-{dž}-' ) ); - // A simple convertion of Cyrillic to Cyrillic + // A simple conversion of Cyrillic to Cyrillic $this->assertEquals( 'абвг', $this->convertToCyrillic( 'абвг' ) ); @@ -110,11 +110,11 @@ class LanguageSrTest extends LanguageClassesTestCase { * @covers LanguageConverter::convertTo */ public function testConversionToLatin() { - // A simple convertion of Latin to Latin + // A simple conversion of Latin to Latin $this->assertEquals( 'abcd', $this->convertToLatin( 'abcd' ) ); - // A simple convertion of Cyrillic to Latin + // A simple conversion of Cyrillic to Latin $this->assertEquals( 'abcd', $this->convertToLatin( 'абцд' ) );