Add new grammar forms for language names in Russian
authorAmir E. Aharoni <amir.aharoni@mail.huji.ac.il>
Tue, 11 Nov 2014 07:04:48 +0000 (09:04 +0200)
committerAmir E. Aharoni <amir.aharoni@mail.huji.ac.il>
Mon, 28 Sep 2015 12:51:24 +0000 (15:51 +0300)
CLDR provides translated language names. They are useful for showing
names by themselves in menus and lists, but it's often problematic to add them
to Russian sentences, because they need to be declined, so a message like
"This page is not available in the $1 language" is hard to localize.

This patch adds new cases for Russian -
"languagegen", "languageprep" and "languageadverb".
(The last one, as its name says, it's not actually a grammatical case,
but a transformation to an adverbial expression.)
This covers most of the needs for language names that MediaWiki supports.

Change-Id: Ib6a0afa5c3736f8b9b2e121cd752c53ee50fad75

languages/classes/LanguageRu.php
resources/src/mediawiki.language/languages/ru.js
tests/phpunit/languages/classes/LanguageRuTest.php
tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js

index 226e313..f50640a 100644 (file)
@@ -24,7 +24,9 @@
 /**
  * Russian (русский язык)
  *
- * You can contact Alexander Sigachov (alexander.sigachov at Googgle Mail)
+ * You can contact:
+ * Alexander Sigachov (alexander.sigachov at Googgle Mail)
+ * Amir E. Aharoni (amir.aharoni@mail.huji.ac.il)
  *
  * @ingroup Language
  */
@@ -44,9 +46,8 @@ class LanguageRu extends Language {
                        return $wgGrammarForms['ru'][$case][$word];
                }
 
-               # These rules are not perfect, but they are currently only used for Wikimedia
-               # site names so it doesn't matter if they are wrong sometimes.
-               # Just add a special case for your site name if necessary.
+               # These rules don't cover the whole language, and are intended only for
+               # names of languages and Wikimedia sites.
 
                # substr doesn't support Unicode and mb_substr has issues,
                # so break it to characters using preg_match_all and then use array_slice and join
@@ -100,6 +101,72 @@ class LanguageRu extends Language {
                                        } elseif ( join( '', array_slice( $chars[0], -3 ) ) === 'ные' ) {
                                                $word = join( '', array_slice( $chars[0], 0, -3 ) ) . 'ных';
                                        }
+                                       break;
+                               case 'languagegen': # язык в родительном падеже ("(с) русского")
+                                       $suffix = join( '', array_slice( $chars[0], -3 ) );
+                                       if ( $suffix === 'кий' ) {
+                                               $word = join(
+                                                       '',
+                                                       array_slice( $chars[0], 0, count( $chars[0] ) - 2 )
+                                               ) . 'ого';
+
+                                               break;
+                                       }
+
+                                       if ( in_array( $word, array( 'иврит', 'идиш' ) ) ) {
+                                               $word = $word . 'а';
+
+                                               break;
+                                       }
+
+                                       break;
+                               case 'languageprep': # язык в предложном падеже ("(на) русском")
+                                       $suffix = join( '', array_slice( $chars[0], -3 ) );
+                                       if ( $suffix === 'кий' ) {
+                                               $word = join(
+                                                       '',
+                                                       array_slice( $chars[0], 0, count( $chars[0] ) - 2 )
+                                               ) . 'ом';
+
+                                               break;
+                                       }
+
+                                       if ( in_array( $word, array( 'иврит', 'идиш' ) ) ) {
+                                               $word = $word . 'е';
+
+                                               break;
+                                       }
+
+                                       break;
+                               case 'languageadverb': # наречие с названием языка ("по-русски")
+                                       $suffix = join( '', array_slice( $chars[0], -3 ) );
+                                       if ( $suffix === 'кий' ) {
+                                               $word = 'по-' . join(
+                                                       '',
+                                                       array_slice( $chars[0], 0, count( $chars[0] ) - 1 )
+                                               );
+
+                                               break;
+                                       }
+
+                                       if ( in_array( $word, array( 'иврит', 'идиш' ) ) ) {
+                                               $word = 'на ' . $word . 'е';
+
+                                               break;
+                                       }
+
+                                       // Known particular cases of undeclinable names
+                                       // Известные несклоняемые
+                                       if ( in_array( $word, array( 'идо', 'урду', 'хинди', 'эсперанто' ) ) ) {
+                                               $word = "на $word";
+
+                                               break;
+                                       }
+
+                                       // Undeclinable
+                                       // Остальные несклоняемые
+                                       $word = "на языке $word";
+
                                        break;
                        }
                }
index ee1d6ef..ccc68f1 100644 (file)
@@ -7,6 +7,7 @@
 // cases.
 
 mediaWiki.language.convertGrammar = function ( word, form ) {
+       /*global $ */
        'use strict';
 
        var grammarForms = mediaWiki.language.getData( 'ru', 'grammarForms' );
@@ -52,6 +53,31 @@ mediaWiki.language.convertGrammar = function ( word, form ) {
                                word = word.slice( 0, -3 ) + 'нике';
                        }
                        break;
+               case 'languagegen': // язык в родительном падеже ("(с) русского")
+                       if ( word.slice( -3 ) === 'кий' ) {
+                               word = word.slice( 0, -2 ) + 'ого';
+                       } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) {
+                               word = word + 'а';
+                       }
+                       break;
+               case 'languageprep': // язык в предложном падеже ("(на) русском")
+                       if ( word.slice( -3 ) === 'кий' ) {
+                               word = word.slice( 0, -2 ) + 'ом';
+                       } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) {
+                               word = word + 'е';
+                       }
+                       break;
+               case 'languageadverb': // наречие с названием языка ("по-русски")
+                       if ( word.slice( -3 ) === 'кий' ) {
+                               word = 'по-' + word.slice( 0, -1 );
+                       } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) {
+                               word = 'на ' + word + 'е';
+                       } else if ( $.inArray( word, [ 'идо', 'урду', 'хинди', 'эсперанто' ] ) > -1 ) {
+                               word = 'на ' + word;
+                       } else {
+                               word = 'на языке ' + word;
+                       }
+                       break;
        }
        return word;
 };
index 1381afb..a301479 100644 (file)
@@ -110,6 +110,71 @@ class LanguageRuTest extends LanguageClassesTestCase {
                                'Викиданные',
                                'prepositional',
                        ),
+                       array(
+                               'русского',
+                               'русский',
+                               'languagegen',
+                       ),
+                       array(
+                               'немецкого',
+                               'немецкий',
+                               'languagegen',
+                       ),
+                       array(
+                               'иврита',
+                               'иврит',
+                               'languagegen',
+                       ),
+                       array(
+                               'эсперанто',
+                               'эсперанто',
+                               'languagegen',
+                       ),
+                       array(
+                               'русском',
+                               'русский',
+                               'languageprep',
+                       ),
+                       array(
+                               'немецком',
+                               'немецкий',
+                               'languageprep',
+                       ),
+                       array(
+                               'идише',
+                               'идиш',
+                               'languageprep',
+                       ),
+                       array(
+                               'эсперанто',
+                               'эсперанто',
+                               'languageprep',
+                       ),
+                       array(
+                               'по-русски',
+                               'русский',
+                               'languageadverb',
+                       ),
+                       array(
+                               'по-немецки',
+                               'немецкий',
+                               'languageadverb',
+                       ),
+                       array(
+                               'на иврите',
+                               'иврит',
+                               'languageadverb',
+                       ),
+                       array(
+                               'на эсперанто',
+                               'эсперанто',
+                               'languageadverb',
+                       ),
+                       array(
+                               'на языке гуарани',
+                               'гуарани',
+                               'languageadverb',
+                       ),
                );
        }
 }
index 399290c..4ecd480 100644 (file)
                                grammarForm: 'prepositional',
                                expected: 'данных',
                                description: 'Grammar test for prepositional case, данные -> данных'
+                       },
+                       {
+                               word: 'русский',
+                               grammarForm: 'languagegen',
+                               expected: 'русского',
+                               description: 'Grammar test for languagegen case, русский -> русского'
+                       },
+                       {
+                               word: 'немецкий',
+                               grammarForm: 'languagegen',
+                               expected: 'немецкого',
+                               description: 'Grammar test for languagegen case, немецкий -> немецкого'
+                       },
+                       {
+                               word: 'иврит',
+                               grammarForm: 'languagegen',
+                               expected: 'иврита',
+                               description: 'Grammar test for languagegen case, иврит -> иврита'
+                       },
+                       {
+                               word: 'эсперанто',
+                               grammarForm: 'languagegen',
+                               expected: 'эсперанто',
+                               description: 'Grammar test for languagegen case, эсперанто -> эсперанто'
+                       },
+                       {
+                               word: 'русский',
+                               grammarForm: 'languageprep',
+                               expected: 'русском',
+                               description: 'Grammar test for languageprep case, русский -> русском'
+                       },
+                       {
+                               word: 'немецкий',
+                               grammarForm: 'languageprep',
+                               expected: 'немецком',
+                               description: 'Grammar test for languageprep case, немецкий -> немецком'
+                       },
+                       {
+                               word: 'идиш',
+                               grammarForm: 'languageprep',
+                               expected: 'идише',
+                               description: 'Grammar test for languageprep case, идиш -> идише'
+                       },
+                       {
+                               word: 'эсперанто',
+                               grammarForm: 'languageprep',
+                               expected: 'эсперанто',
+                               description: 'Grammar test for languageprep case, эсперанто -> эсперанто'
+                       },
+                       {
+                               word: 'русский',
+                               grammarForm: 'languageadverb',
+                               expected: 'по-русски',
+                               description: 'Grammar test for languageadverb case, русский -> по-русски'
+                       },
+                       {
+                               word: 'немецкий',
+                               grammarForm: 'languageadverb',
+                               expected: 'по-немецки',
+                               description: 'Grammar test for languageadverb case, немецкий -> по-немецки'
+                       },
+                       {
+                               word: 'иврит',
+                               grammarForm: 'languageadverb',
+                               expected: 'на иврите',
+                               description: 'Grammar test for languageadverb case, иврит -> на иврите'
+                       },
+                       {
+                               word: 'эсперанто',
+                               grammarForm: 'languageadverb',
+                               expected: 'на эсперанто',
+                               description: 'Grammar test for languageadverb case, эсперанто -> на эсперанто'
+                       },
+                       {
+                               word: 'гуарани',
+                               grammarForm: 'languageadverb',
+                               expected: 'на языке гуарани',
+                               description: 'Grammar test for languageadverb case, гуарани -> на языке гуарани'
                        }
                ],