<?php
-/** Kazakh (Қазақша)
- * converter routines
- *
- * @addtogroup Language
- */
require_once( dirname(__FILE__).'/../LanguageConverter.php' );
require_once( dirname(__FILE__).'/LanguageKk_cyrl.php' );
define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase
define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase
-define( 'KK_A', 'اٵبۆگعدەجزيكقلمنڭوٶپرستۋۇٷفحھچشىٸ' ); # Kazakh Arabic
+//define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
+define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA
+//define( 'ZWNJ', '' ); # U+200C ZERO WIDTH NON-JOINER
+/** Kazakh (Қазақша)
+ * converter routines
+ *
+ * @ingroup Language
+ */
class KkConverter extends LanguageConverter {
-
+
function __construct($langobj, $maincode,
$variants=array(),
$variantfallbacks=array(),
$flags = array()) {
parent::__construct( $langobj, $maincode,
$variants, $variantfallbacks, $markup, $flags );
-
+
// No point delaying this since they're in code.
// Waiting until loadDefaultTables() means they never get loaded
// when the tables themselves are loaded from cache.
$kk2Cyrl = array();
$kk2Latn = array();
$kk2Arab = array();
- $kk2KZ = array();
- $kk2TR = array();
- $kk2CN = array();
+ $kk2KZ = array();
+ $kk2TR = array();
+ $kk2CN = array();
$this->mTables = array(
- 'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
- 'kk-latn' => new ReplacementArray( $kk2Latn ),
- 'kk-arab' => new ReplacementArray( $kk2Arab ),
- 'kk-kz' => new ReplacementArray( array_merge($kk2Cyrl, $kk2KZ) ),
- 'kk-tr' => new ReplacementArray( array_merge($kk2Latn, $kk2TR) ),
- 'kk-cn' => new ReplacementArray( array_merge($kk2Arab, $kk2CN) ),
- 'kk' => new ReplacementArray()
+ 'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
+ 'kk-latn' => new ReplacementArray( $kk2Latn ),
+ 'kk-arab' => new ReplacementArray( $kk2Arab ),
+ 'kk-kz' => new ReplacementArray( array_merge($kk2Cyrl, $kk2KZ) ),
+ 'kk-tr' => new ReplacementArray( array_merge($kk2Latn, $kk2TR) ),
+ 'kk-cn' => new ReplacementArray( array_merge($kk2Arab, $kk2CN) ),
+ 'kk' => new ReplacementArray()
);
}
$this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] );
}
- function loadRegs() {
+ function loadRegs() {
$this->mCyrl2Latn = array(
+ ## Punctuation
'/№/u' => 'No.',
## Е after vowels
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE',
'/^Ю(['.KK_C_UC.']|$)/u' => 'YU$1', '/^Ю(['.KK_C_LC.']|$)/u' => 'Yu$1',
'/^Я(['.KK_C_UC.']|$)/u' => 'YA$1', '/^Я(['.KK_C_LC.']|$)/u' => 'Ya$1',
'/^Щ(['.KK_C_UC.']|$)/u' => 'ŞÇ$1', '/^Щ(['.KK_C_LC.']|$)/u' => 'Şç$1',
- ## other ЁЮЯ
- '/Ё/u' => 'YO', '/ё/u' => 'yo',
+ ## other ЁЮЯ
+ '/Ё/u' => 'YO', '/ё/u' => 'yo',
'/Ю/u' => 'YU', '/ю/u' => 'yu',
'/Я/u' => 'YA', '/я/u' => 'ya',
'/Щ/u' => 'ŞÇ', '/щ/u' => 'şç',
);
$this->mLatn2Cyrl = array(
- '/No\./u' => '№',
+ ## Punctuation
+ '/#|No\./' => '№',
## Şç
'/ŞÇʹ/u'=> 'ЩЬ', '/Şçʹ/u'=> 'Щь', '/Şçʹ/u'=> 'Щь',
'/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ',
);
$this->mCyLa2Arab = array(
+ ## Punctuation -> Arabic
+ '/#|№|No\./u' => '', # ؀
+ '/\,/' => '،', # ،
+ '/;/' => '؛', # ؛
+ '/\?/' => '؟', # ؟
+ '/%/' => '٪', # ٪
+ '/\*/' => '٭', # ٭
+ ## Digits -> Arabic
+ '/0/' => '۰', # ۰
+ '/1/' => '۱', # ۱
+ '/2/' => '۲', # ۲
+ '/3/' => '۳', # ۳
+ '/4/' => '۴', # ۴
+ '/5/' => '۵', # ۵
+ '/6/' => '۶', # ۶
+ '/7/' => '۷', # ۷
+ '/8/' => '۸', # ۸
+ '/9/' => '۹', # ۹
## Cyrillic -> Arabic
- '/\№/u' => 'نٶ.',
+ '/Аллаһ/ui' => 'ﷲ',
'/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە',
- '/а/ui' => 'ا', '/ә/ui' => 'ٵ', '/б/ui' => 'ب', '/в/ui' => 'ۆ',
- '/г/ui' => 'گ', '/ғ/ui' => 'ع', '/д/ui' => 'د', '/[еэ]/ui' => 'ە',
- '/ё/ui' => 'يو', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/и/ui' => 'ي',
- '/й/ui' => 'ي', '/к/ui' => 'ك', '/қ/ui' => 'ق', '/л/ui' => 'ل',
- '/м/ui' => 'م', '/н/ui' => 'ن', '/ң/ui' => 'ڭ', '/о/ui' => 'و',
- '/ө/ui' => 'ٶ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
- '/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ұ/ui' => 'ۇ', '/ү/ui' => 'ٷ',
- '/ф/ui' => 'ف', '/х/ui' => 'ح', '/һ/ui' => 'ھ', '/ц/ui' => 'تس',
- '/ч/ui' => 'چ', '/ш/ui' => 'ش', '/щ/ui' => 'شش', '/[ъь]/ui' => '',
- '/ы/ui' => 'ى', '/і/ui' => 'ٸ', '/ю/ui' => 'يۋ', '/я/ui' => 'يا',
-
+ '/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
+ '/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
+ '/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
+ '/ц/ui' => 'تس', '/щ/ui' => 'شش',
+ '/һ/ui' => 'ح', '/ч/ui' => 'تش',
+ #'/һ/ui' => 'ھ', '/ч/ui' => 'چ',
+ '/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
+ '/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
+ '/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
+ '/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
+ '/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
+ '/ш/ui' => 'ش',
## Latin -> Arabic // commented for now...
- /*'/No\./u' => 'نٶ.',
- '/a/ui' => 'ا', '/ä/ui' => 'ٵ', '/b/ui' => 'ب', '/c/ui' => 'تس',
- '/ç/ui' => 'چ', '/d/ui' => 'د', '/[eé]/ui' => 'ە', '/f/ui' => 'ف',
- '/g/ui' => 'گ', '/ğ/ui' => 'ع', '/h/ui' => 'ھ', '/[ıI]/u' => 'ى',
- '/[iİ]/u' => 'ٸ', '/ï/ui' => 'ي', '/j/ui' => 'ج', '/k/ui' => 'ك',
- '/l/ui' => 'ل', '/m/ui' => 'م', '/n/ui' => 'ن', '/ñ/ui' => 'ڭ',
- '/o/ui' => 'و', '/ö/ui' => 'ٶ', '/p/ui' => 'پ', '/q/ui' => 'ق',
+ /*'/Allah/ui' => 'ﷲ',
+ '/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '',
+ '/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ',
+ '/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى',
+ '/c/ui' => 'تس',
+ '/ç/ui' => 'تش', '/h/ui' => 'ح',
+ #'/ç/ui' => 'چ', '/h/ui' => 'ھ',
+ '/b/ui' => 'ب','/d/ui' => 'د',
+ '/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع',
+ '/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م',
+ '/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق',
'/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت',
- '/u/ui' => 'ۇ', '/ü/ui' => 'ٷ', '/v/ui' => 'ۆ', '/w/ui' => 'ۋ',
- '/x/ui' => 'ح', '/[yý]/ui' => 'ي', '/z/ui' => 'ز', '/[ʺʹ]/ui' => '',*/
-
- ## Punctuation -> Arabic
- '/\?/' => '؟', # ؟
- '/\,/' => '،', # ،
- '/\;/' => '؛' , # ؛
- '/\%/' => '٪', # ٪
- ## Digits -> Arabic
- '/0/' => '۰', # ۰
- '/1/' => '۱', # ۱
- '/2/' => '۲', # ۲
- '/3/' => '۳', # ۳
- '/4/' => '۴', # ۴
- '/5/' => '۵', # ۵
- '/6/' => '۶', # ۶
- '/7/' => '۷', # ۷
- '/8/' => '۸', # ۸
- '/9/' => '۹', # ۹
+ '/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/
);
-
}
/* rules should be defined as -{ekavian | iyekavian-} -or-
foreach($this->mVariants as $v) {
$carray[$v] = $rule;
}
-
+
return $carray;
}
function parserConvert( $text, &$parser ){
if(is_object($parser->getTitle() ) && $parser->getTitle()->isTalkPage())
$this->mDoContentConvert=false;
- else
+ else
$this->mDoContentConvert=true;
return parent::parserConvert($text, $parser );
/*
* A function wrapper:
- * - if there is no selected variant, leave the link
+ * - if there is no selected variant, leave the link
* names as they were
* - do not try to find variants for usernames
*/
- function findVariantLink( &$link, &$nt ) {
+ function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
// check for user namespace
if(is_object($nt)){
$ns = $nt->getNamespace();
}
$oldlink=$link;
- parent::findVariantLink($link,$nt);
- if($this->getPreferredVariant()==$this->mMainLanguageCode)
+ parent::findVariantLink( $link, $nt, $ignoreOtherCond );
+ if( $this->getPreferredVariant()==$this->mMainLanguageCode )
$link=$oldlink;
}
*/
function autoConvert($text, $toVariant=false) {
global $wgTitle;
- if(is_object($wgTitle) && $wgTitle->getNameSpace()==NS_IMAGE){
+ if(is_object($wgTitle) && $wgTitle->getNameSpace()==NS_FILE){
$imagename = $wgTitle->getNsText();
if(preg_match("/^$imagename:/",$text)) return $text;
}
switch( $toVariant ) {
case 'kk-cyrl':
case 'kk-kz':
- $letters = KK_L_UC . KK_L_LC . 'ʺʹ0123456789';
+ $letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
$wgContLanguageCode = 'kk';
break;
case 'kk-latn':
break;
case 'kk-arab':
case 'kk-cn':
- // $letters = KK_C_UC.KK_C_LC.KK_L_UC.KK_L_LC.'ʺʹ%№0123456789?,;';
- $letters = KK_C_UC . KK_C_LC . '%№0123456789?,;';
+ $letters = KK_C_UC.KK_C_LC./*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789';
$wgContLanguageCode = 'kk-Arab';
break;
default:
return $ret;
}
- function regsConverter( $text, $toVariant ) {
+ function regsConverter( $text, $toVariant ) {
if ($text == '') return $text;
$pat = array();
switch( $toVariant ) {
case 'kk-arab':
case 'kk-cn':
+ $letters = KK_C_LC.KK_C_UC/*.KK_L_LC.KK_L_UC*/;
+ $front = 'әөүіӘӨҮІ'/*.'äöüiÄÖÜİ'*/;
+ $excludes = 'еэгғкқЕЭГҒКҚ'/*.'eégğkqEÉGĞKQ'*/;
+ // split text to words
+ $matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
+ $mstart = 0;
+ $ret = '';
+ foreach( $matches as $m ) {
+ $ret .= substr( $text, $mstart, $m[1] - $mstart );
+ // is matched the word to front vowels?
+ // exclude a words matched to е, э, г, к, к, қ,
+ // them should be without hamza
+ if ( preg_match('/['.$front.']/u', $m[0]) && !preg_match('/['.$excludes.']/u', $m[0]) ) {
+ $ret .= preg_replace('/['.$letters.']+/u', H_HAMZA.'$0', $m[0]);
+ } else {
+ $ret .= $m[0];
+ }
+ $mstart = $m[1] + strlen($m[0]);
+ }
+ $text =& $ret;
foreach( $this->mCyLa2Arab as $pat => $rep ) {
$text = preg_replace( $pat, $rep, $text );
}
return $text;
break;
case 'kk-cyrl':
- case 'kk-cn':
+ case 'kk-kz':
foreach( $this->mLatn2Cyrl as $pat => $rep ) {
$text = preg_replace( $pat, $rep, $text );
}
}
-/* class that handles Cyrillic, Latin and Arabic scripts for Kazakh
- right now it only distinguish kk_cyrl, kk_latn, kk_arab and kk_kz, kk_tr, kk_cn.
-*/
+/**
+ * class that handles Cyrillic, Latin and Arabic scripts for Kazakh
+ * right now it only distinguish kk_cyrl, kk_latn, kk_arab and kk_kz, kk_tr, kk_cn.
+ *
+ * @ingroup Language
+ */
class LanguageKk extends LanguageKk_cyrl {
function __construct() {
$variants = array( 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' );
$variantfallbacks = array(
- 'kk' => 'kk-kz',
+ 'kk' => 'kk-cyrl',
'kk-cyrl' => 'kk',
'kk-latn' => 'kk',
'kk-arab' => 'kk',
*
* @return bool
*/
- function isRTL() {
+ function isRTL() {
$variant = $this->getPreferredVariant();
if ( $variant == 'kk-arab' || $variant == 'kk-cn' ) {
return true;
/*
* It fixes issue with ucfirst for transforming 'i' to 'İ'
- *
+ *
*/
function ucfirst ( $string ) {
$variant = $this->getPreferredVariant();
/*
* It fixes issue with lcfirst for transforming 'I' to 'ı'
- *
+ *
*/
function lcfirst ( $string ) {
$variant = $this->getPreferredVariant();
}
function convertGrammar( $word, $case ) {
- $fname="LanguageKk::convertGrammar";
- wfProfileIn( $fname );
+ wfProfileIn( __METHOD__ );
$variant = $this->getPreferredVariant();
switch ( $variant ) {
case 'kk-arab':
case 'kk-cn':
- $word = parent::convertGrammar( $word, $case, $variant = 'kk-arab' );
+ $word = parent::convertGrammarKk_arab( $word, $case );
break;
case 'kk-latn':
case 'kk-tr':
- $word = parent::convertGrammar( $word, $case, $variant = 'kk-latn' );
+ $word = parent::convertGrammarKk_latn( $word, $case );
break;
case 'kk-cyrl':
case 'kk-kz':
case 'kk':
default:
- $word = parent::convertGrammar( $word, $case, $variant = 'kk-cyrl' );
+ $word = parent::convertGrammarKk_cyrl( $word, $case );
}
- wfProfileOut( $fname );
+ wfProfileOut( __METHOD__ );
return $word;
}
-
}