Custom grammar rules in javascript for some languages
authorSanthosh Thottingal <santhosh.thottingal@gmail.com>
Mon, 4 Jun 2012 09:19:35 +0000 (14:49 +0530)
committerSanthosh Thottingal <santhosh.thottingal@gmail.com>
Mon, 4 Jun 2012 09:19:35 +0000 (14:49 +0530)
Splitted from https://gerrit.wikimedia.org/r/4554
Grammar rules ported to js based on the php code.
Depends on the javascript Grammar parsing support by jqueryMsg

Change-Id: I701954a623467bf6c9abd3160e239c59cab567e9

14 files changed:
resources/Resources.php
resources/mediawiki.language/languages/bs.js
resources/mediawiki.language/languages/dsb.js
resources/mediawiki.language/languages/fi.js [new file with mode: 0644]
resources/mediawiki.language/languages/ga.js
resources/mediawiki.language/languages/he.js
resources/mediawiki.language/languages/hsb.js
resources/mediawiki.language/languages/hu.js [new file with mode: 0644]
resources/mediawiki.language/languages/hy.js
resources/mediawiki.language/languages/la.js [new file with mode: 0644]
resources/mediawiki.language/languages/os.js [new file with mode: 0644]
resources/mediawiki.language/languages/ru.js
resources/mediawiki.language/languages/sl.js
resources/mediawiki.language/languages/uk.js

index 34b9be6..1f2dcd0 100644 (file)
@@ -687,6 +687,7 @@ return array(
                        'cu' => 'resources/mediawiki.language/languages/cu.js',
                        'cy' => 'resources/mediawiki.language/languages/cy.js',
                        'dsb' => 'resources/mediawiki.language/languages/dsb.js',
+                       'fi' => 'resources/mediawiki.language/languages/fi.js',
                        'fr' => 'resources/mediawiki.language/languages/fr.js',
                        'ga' => 'resources/mediawiki.language/languages/ga.js',
                        'gd' => 'resources/mediawiki.language/languages/gd.js',
@@ -695,8 +696,10 @@ return array(
                        'hi' => 'resources/mediawiki.language/languages/hi.js',
                        'hr' => 'resources/mediawiki.language/languages/hr.js',
                        'hsb' => 'resources/mediawiki.language/languages/hsb.js',
+                       'hu' => 'resources/mediawiki.language/languages/hu.js',
                        'hy' => 'resources/mediawiki.language/languages/hy.js',
                        'ksh' => 'resources/mediawiki.language/languages/ksh.js',
+                       'la' => 'resources/mediawiki.language/languages/la.js',
                        'ln' => 'resources/mediawiki.language/languages/ln.js',
                        'lt' => 'resources/mediawiki.language/languages/lt.js',
                        'lv' => 'resources/mediawiki.language/languages/lv.js',
@@ -706,6 +709,7 @@ return array(
                        'mt' => 'resources/mediawiki.language/languages/mt.js',
                        'nl' => 'resources/mediawiki.language/languages/nl.js',
                        'nso' => 'resources/mediawiki.language/languages/nso.js',
+                       'os' => 'resources/mediawiki.language/languages/os.js',
                        'pl' => 'resources/mediawiki.language/languages/pl.js',
                        'pt' => 'resources/mediawiki.language/languages/pt.js',
                        'pt-br' => 'resources/mediawiki.language/languages/pt-br.js',
index 9f22426..9affcf0 100644 (file)
@@ -18,3 +18,19 @@ mediaWiki.language.convertPlural = function( count, forms ) {
                        return forms[2];
        }
 };
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'bs', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'instrumental': // instrumental
+                       word = 's ' + word;
+                       break;
+               case 'lokativ': // locative
+                       word = 'o ' + word;
+                       break;
+       }
+       return word;
+};
index c3eaa1f..e3c5808 100644 (file)
@@ -16,3 +16,19 @@ mediaWiki.language.convertPlural = function( count, forms ) {
                        return forms[3];
        }
 };
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'dsb', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'instrumental': // instrumental
+                       word = 'z ' + word;
+                       break;
+               case 'lokatiw': // lokatiw
+                       word = 'wo ' + word;
+                       break;
+       }
+       return word;
+};
diff --git a/resources/mediawiki.language/languages/fi.js b/resources/mediawiki.language/languages/fi.js
new file mode 100644 (file)
index 0000000..374698d
--- /dev/null
@@ -0,0 +1,44 @@
+/**
+ *  Finnish (Suomi) language functions
+ *  @author Santhosh Thottingal
+ */
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'fi', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+
+       // vowel harmony flag
+       var aou = word.match( /[aou][^äöy]*$/i );
+       var origWord = word;
+       if ( word.match( /wiki$/i ) ) {
+               aou = false;
+       }
+       //append i after final consonant
+       if ( word.match( /[bcdfghjklmnpqrstvwxz]$/i ) )
+               word += 'i';
+
+       switch ( form ) {
+               case 'genitive':
+                       word += 'n';
+                       break;
+               case 'elative':
+                       word += ( aou ? 'sta' : 'stä' );
+                       break;
+               case 'partitive':
+                       word += ( aou ? 'a' : 'ä' );
+                       break;
+               case 'illative':
+                       // Double the last letter and add 'n'
+                       word += word.substr(  word.length-1 ) + 'n';
+                       break;
+               case 'inessive':
+                       word += ( aou ? 'ssa' : 'ssä' );
+                       break;
+               default:
+                       word = origWord;
+                       break;
+       }
+       return word;
+};
index 3928013..51fce35 100644 (file)
@@ -12,3 +12,38 @@ mediaWiki.language.convertPlural = function( count, forms ) {
        }
        return forms[2];
 };
+
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'ga', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'ainmlae':
+                       switch ( word ) {
+                               case 'an Domhnach':
+                                       word = 'Dé Domhnaigh';
+                                       break;
+                               case 'an Luan':
+                                       word = 'Dé Luain';
+                                       break;
+                               case 'an Mháirt':
+                                       word = 'Dé Mháirt';
+                                       break;
+                               case 'an Chéadaoin':
+                                       word = 'Dé Chéadaoin';
+                                       break;
+                               case 'an Déardaoin':
+                                       word = 'Déardaoin';
+                                       break;
+                               case 'an Aoine':
+                                       word = 'Dé hAoine';
+                                       break;
+                               case 'an Satharn':
+                                       word = 'Dé Sathairn';
+                                       break;
+                       }
+       }
+       return word;
+};
index b74440e..e737a7c 100644 (file)
@@ -12,3 +12,29 @@ mediaWiki.language.convertPlural = function( count, forms ) {
        }
        return forms[1];
 };
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'he', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'prefixed':
+               case 'תחילית': // the same word in Hebrew
+                       // Duplicate prefixed "Waw", but only if it's not already double
+                       if ( word.substr( 0, 1 ) === "ו" && word.substr( 0, 2 ) !== "וו" ) {
+                               word = "ו" + word;
+                       }
+
+                       // Remove the "He" if prefixed
+                       if ( word.substr( 0, 1 ) === "ה" ) {
+                               word = word.substr( 1, word.length );
+                       }
+
+                       // Add a hyphen (maqaf) before numbers and non-Hebrew letters
+                       if (  word.substr( 0, 1 ) < "א" ||  word.substr( 0, 1 ) > "ת" ) {
+                               word = "־" + word;
+                       }
+       }
+       return word;
+};
index 8651fe4..6aa993c 100644 (file)
@@ -16,3 +16,20 @@ mediaWiki.language.convertPlural = function( count, forms ) {
                        return forms[3];
        }
 };
+
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms =mw.language.getData( 'hsb', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'instrumental': // instrumental
+                       word = 'z ' + word;
+                       break;
+               case 'lokatiw': // lokatiw
+                       word = 'wo ' + word;
+                       break;
+               }
+       return word;
+};
diff --git a/resources/mediawiki.language/languages/hu.js b/resources/mediawiki.language/languages/hu.js
new file mode 100644 (file)
index 0000000..eb3f1f3
--- /dev/null
@@ -0,0 +1,23 @@
+/**
+ *  Hungarian language functions
+ *  @author Santhosh Thottingal
+ */
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'hu', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'rol':
+                       word += 'ról';
+                       break;
+               case 'ba':
+                       word += 'ba';
+                       break;
+               case 'k':
+                       word += 'k';
+                       break;
+       }
+       return word;
+};
index 734c26d..b51b2b6 100644 (file)
@@ -6,3 +6,27 @@ mediaWiki.language.convertPlural = function( count, forms ) {
        forms = mediaWiki.language.preConvertPlural( forms, 2 );
        return ( Math.abs( count ) <= 1 ) ? forms[0] : forms[1];
 };
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'hy', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+
+       // These rules are not perfect, but they are currently only used for site names so it doesn't
+       // matter if they are wrong sometimes. Just add a special case for your site name if necessary.
+
+       switch ( form ) {
+               case 'genitive': // սեռական հոլով
+                       if ( word.substr( -1 ) === 'ա' )
+                               word = word.substr( 0, word.length -1 )  + 'այի';
+                       else if ( word.substr( -1 ) === 'ո' )
+                               word = word.substr( 0, word.length - 1 ) + 'ոյի';
+                       else if ( word.substr( -4 ) === 'գիրք' )
+                               word = word.substr( 0, word.length - 4 ) + 'գրքի';
+                       else
+                               word = word + 'ի';
+                       break;
+               }
+       return word;
+};
diff --git a/resources/mediawiki.language/languages/la.js b/resources/mediawiki.language/languages/la.js
new file mode 100644 (file)
index 0000000..313bb1c
--- /dev/null
@@ -0,0 +1,50 @@
+/**
+ * Latin (lingua Latina) language functions
+ * @author Santhosh Thottingal
+ */
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'la', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'genitive':
+                       // only a few declensions, and even for those mostly the singular only
+                       word = word.replace( /u[ms]$/i, 'i' ); // 2nd declension singular
+                       word = word.replace( /ommunia$/i, 'ommunium' ); // 3rd declension neuter plural (partly)
+                       word = word.replace( /a$/i, 'ae' ); // 1st declension singular
+                       word = word.replace( /libri$/i,'librorum' ); // 2nd declension plural (partly)
+                       word = word.replace( /nuntii$/i, 'nuntiorum' ); // 2nd declension plural (partly)
+                       word = word.replace( /tio$/i,'tionis' ); // 3rd declension singular (partly)
+                       word = word.replace( /ns$/i, 'ntis' );
+                       word = word.replace( /as$/i, 'atis' );
+                       word = word.replace( /es$/i ,'ei' ); // 5th declension singular
+                       break;
+               case 'accusative':
+                       // only a few declensions, and even for those mostly the singular only
+                       word = word.replace( /u[ms]$/i, 'um' ); // 2nd declension singular
+                       word = word.replace( /ommunia$/i, 'am' ); // 3rd declension neuter plural (partly)
+                       word = word.replace( /a$/i, 'ommunia' ); // 1st declension singular
+                       word = word.replace( /libri$/i,'libros' ); // 2nd declension plural (partly)
+                       word = word.replace( /nuntii$/i, 'nuntios' );// 2nd declension plural (partly)
+                       word = word.replace( /tio$/i,'tionem' ); // 3rd declension singular (partly)
+                       word = word.replace( /ns$/i, 'ntem' );
+                       word = word.replace( /as$/i, 'atem');
+                       word = word.replace( /es$/i ,'em' ); // 5th declension singular
+                       break;
+               case 'ablative':
+                       // only a few declensions, and even for those mostly the singular only
+                       word = word.replace( /u[ms]$/i, 'o' ); // 2nd declension singular
+                       word = word.replace( /ommunia$/i, 'ommunibus' ); // 3rd declension neuter plural (partly)
+                       word = word.replace( /a$/i, 'a' ); // 1st declension singular
+                       word = word.replace( /libri$/i,'libris' ); // 2nd declension plural (partly)
+                       word = word.replace( /nuntii$/i, 'nuntiis' ); // 2nd declension plural (partly)
+                       word = word.replace( /tio$/i,'tione' ); // 3rd declension singular (partly)
+                       word = word.replace( /ns$/i, 'nte' );
+                       word = word.replace( /as$/i, 'ate');
+                       word = word.replace( /es$/i ,'e' ); // 5th declension singular
+                       break;
+       }
+       return word;
+};
diff --git a/resources/mediawiki.language/languages/os.js b/resources/mediawiki.language/languages/os.js
new file mode 100644 (file)
index 0000000..431e38c
--- /dev/null
@@ -0,0 +1,69 @@
+/**
+ * Ossetian (Ирон) language functions
+ * @author Santhosh Thottingal
+ */
+
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'os', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       // Ending for allative case
+       var end_allative = 'мæ';
+       // Variable for 'j' beetwen vowels
+       var jot = '';
+       // Variable for "-" for not Ossetic words
+       var hyphen = '';
+       // Variable for ending
+       var ending = '';
+       // Checking if the $word is in plural form
+       if ( word.match( /тæ$/i ) ) {
+               word = word.substring( 0, word.length - 1 );
+               end_allative = 'æм';
+       }
+       // Works if word is in singular form.
+       // Checking if word ends on one of the vowels: е, ё, и, о, ы, э, ю, я.
+       else if ( word.match( /[аæеёиоыэюя]$/i ) ) {
+               jot = 'й';
+       }
+       // Checking if word ends on 'у'. 'У' can be either consonant 'W' or vowel 'U' in cyrillic Ossetic.
+       // Examples: {{grammar:genitive|аунеу}} = аунеуы, {{grammar:genitive|лæппу}} = лæппуйы.
+       else if ( word.match( /у$/i ) ) {
+               if ( ! word.substring( word.length-2, word.length-1 ).match( /[аæеёиоыэюя]$/i ) ) {
+                       jot = 'й';
+               }
+       } else if ( !word.match( /[бвгджзйклмнопрстфхцчшщьъ]$/i ) ) {
+               hyphen = '-';
+       }
+
+       switch ( form ) {
+               case 'genitive':
+                       ending = hyphen + jot + 'ы';
+                       break;
+               case 'dative':
+                       ending = hyphen + jot + 'æн';
+                       break;
+               case 'allative':
+                       ending = hyphen + end_allative;
+                       break;
+               case 'ablative':
+                       if ( jot == 'й' ) {
+                               ending = hyphen + jot + 'æ';
+                       }
+                       else {
+                               ending = hyphen + jot + 'æй';
+                       }
+                       break;
+               case 'superessive':
+                       ending = hyphen + jot + 'ыл';
+                       break;
+               case 'equative':
+                       ending = hyphen + jot + 'ау';
+                       break;
+               case 'comitative':
+                       ending = hyphen + 'имæ';
+                       break;
+       }
+       return word + ending;
+};
index 17c9293..42be4f9 100644 (file)
@@ -21,3 +21,29 @@ mediaWiki.language.convertPlural = function( count, forms ) {
                        return forms[2];
        }
 };
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'ru', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'genitive': // родительный падеж
+                       if ( (  word.substr( word.length - 4 )  == 'вики' ) || (  word.substr( word.length - 4 ) == 'Вики' ) ) {
+                       }
+                       else if ( word.substr( word.length - 1 ) == 'ь' )
+                               word = word.substr(0, word.length - 1 ) + 'я';
+                       else if ( word.substr( word.length - 2 ) == 'ия' )
+                               word = word.substr(0, word.length - 2 ) + 'ии';
+                       else if ( word.substr( word.length - 2 ) == 'ка' )
+                               word = word.substr(0, word.length - 2 ) + 'ки';
+                       else if ( word.substr( word.length - 2 )  == 'ти' )
+                               word = word.substr(0, word.length - 2 ) + 'тей';
+                       else if ( word.substr( word.length - 2 ) == 'ды' )
+                               word = word.substr(0, word.length - 2 ) + 'дов';
+                       else if ( word.substr( word.length - 3 ) == 'ник' )
+                               word = word.substr(0, word.length - 3 ) + 'ника';
+                       break;
+       }
+       return word;
+};
index a887e0e..cfb5781 100644 (file)
@@ -18,3 +18,19 @@ mediaWiki.language.convertPlural = function( count, forms ) {
        }
        return forms[4];
 };
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'sl', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'mestnik': // locative
+                       word = 'o ' + word;
+                       break;
+               case 'orodnik': // instrumental
+                       word = 'z ' + word;
+                       break;
+       }
+       return word;
+};
index 5c1294e..372d441 100644 (file)
@@ -21,3 +21,35 @@ mediaWiki.language.convertPlural = function( count, forms ) {
                        return forms[2];
        }
 };
+
+mediaWiki.language.convertGrammar = function( word, form ) {
+       var grammarForms = mw.language.getData( 'uk', 'grammarForms' );
+       if ( grammarForms && grammarForms[form] ) {
+               return grammarForms[form][word] ;
+       }
+       switch ( form ) {
+               case 'genitive': // родовий відмінок
+                       if ( (  word.substr( word.length - 4 ) == 'вікі' ) || (  word.substr( word.length - 4 ) == 'Вікі' ) ) {
+                       }
+                       else if ( word.substr( word.length - 1 ) == 'ь' )
+                               word = word.substr(0, word.length - 1 ) + 'я';
+                       else if ( word.substr( word.length - 2 ) == 'ія' )
+                               word = word.substr(0, word.length - 2 ) + 'ії';
+                       else if ( word.substr( word.length - 2 ) == 'ка' )
+                               word = word.substr(0, word.length - 2 ) + 'ки';
+                       else if ( word.substr( word.length - 2 )  == 'ти' )
+                               word = word.substr(0, word.length - 2 ) + 'тей';
+                       else if ( word.substr( word.length - 2 ) == 'ды' )
+                               word = word.substr(0, word.length - 2 ) + 'дов';
+                       else if ( word.substr( word.length - 3 ) == 'ник' )
+                               word = word.substr(0, word.length - 3 ) + 'ника';
+                       break;
+               case 'accusative': // знахідний відмінок
+                       if ( (  word.substr( word.length - 4 ) == 'вікі' ) || (  word.substr( word.length - 4 ) == 'Вікі' ) ) {
+                       }
+                       else if ( word.substr( word.length - 2 ) == 'ія' )
+                               word = word.substr(0, word.length - 2 ) + 'ію';
+                       break;
+       }
+       return word;
+};