Accept BCP 47 codes in LanguageConverter rules
authorC. Scott Ananian <cscott@cscott.net>
Fri, 13 Jul 2018 19:40:20 +0000 (15:40 -0400)
committerC. Scott Ananian <cscott@cscott.net>
Wed, 17 Oct 2018 03:58:11 +0000 (23:58 -0400)
Facilitate a gradual migration away from non-standard MediaWiki language
codes.  This will ensure that (a) rules can be written with standard
BCP 47 codes, and (b) rules written with existing nonstandard codes will
continue to work once these are added to
LanguageCode::$deprecatedLanguageCodeMapping.

Change-Id: I3ba96faafaf40bd47fb5919621f7035f0431a698

languages/ConverterRule.php
languages/LanguageConverter.php
tests/parser/parserTests.txt

index dc61519..6ce1274 100644 (file)
@@ -153,25 +153,27 @@ class ConverterRule {
                        $to = trim( $v[1] );
                        $v = trim( $v[0] );
                        $u = explode( '=>', $v, 2 );
+                       $vv = $this->mConverter->validateVariant( $v );
                        // if $to is empty (which is also used as $from in bidtable),
                        // strtr() could return a wrong result.
-                       if ( count( $u ) == 1 && $to !== '' && in_array( $v, $variants ) ) {
-                               $bidtable[$v] = $to;
+                       if ( count( $u ) == 1 && $to !== '' && $vv ) {
+                               $bidtable[$vv] = $to;
                        } elseif ( count( $u ) == 2 ) {
                                $from = trim( $u[0] );
                                $v = trim( $u[1] );
+                               $vv = $this->mConverter->validateVariant( $v );
                                // if $from is empty, strtr() could return a wrong result.
-                               if ( array_key_exists( $v, $unidtable )
-                                       && !is_array( $unidtable[$v] )
+                               if ( array_key_exists( $vv, $unidtable )
+                                       && !is_array( $unidtable[$vv] )
                                        && $from !== ''
-                                       && in_array( $v, $variants ) ) {
-                                       $unidtable[$v] = [ $from => $to ];
-                               } elseif ( $from !== '' && in_array( $v, $variants ) ) {
-                                       $unidtable[$v][$from] = $to;
+                                       && $vv ) {
+                                       $unidtable[$vv] = [ $from => $to ];
+                               } elseif ( $from !== '' && $vv ) {
+                                       $unidtable[$vv][$from] = $to;
                                }
                        }
                        // syntax error, pass
-                       if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
+                       if ( !isset( $this->mConverter->mVariantNames[$vv] ) ) {
                                $bidtable = [];
                                $unidtable = [];
                                break;
index ea26c64..137fe87 100644 (file)
@@ -1175,8 +1175,21 @@ class LanguageConverter {
                        //    [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
                        //    [2] => ''
                        //  ]
-                       $pat = '/;\s*(?=';
+                       $expandedVariants = [];
                        foreach ( $this->mVariants as $variant ) {
+                               $expandedVariants[ $variant ] = 1;
+                               // Accept standard BCP 47 names for variants as well.
+                               $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1;
+                       }
+                       // Accept old deprecated names for variants
+                       foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) {
+                               if ( isset( $expandedVariants[ $new ] ) ) {
+                                       $expandedVariants[ $old ] = 1;
+                               }
+                       }
+
+                       $pat = '/;\s*(?=';
+                       foreach ( $expandedVariants as $variant => $ignore ) {
                                // zh-hans:xxx;zh-hant:yyy
                                $pat .= $variant . '\s*:|';
                                // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
index bbd9ecb..50c6a89 100644 (file)
@@ -22434,6 +22434,19 @@ language=zh variant=zh-tw
 <p><span typeof="mw:LanguageVariant" data-parsoid='{"tSp":[6]}' data-mw-variant='{"twoway":[{"l":"zh","t":"China"},{"l":"zh-tw","t":"Taiwan"}]}'></span>, not China</p>
 !! end
 
+!! test
+Explicit definition of language variant alternatives (BCP 47 codes)
+!! options
+language=zh variant=zh-tw
+!! wikitext
+-{zh:China;zh-Hant-TW:Taiwan}-, not China
+!! html/php
+<p>Taiwan, not China
+</p>
+!! html/parsoid
+<p><span typeof="mw:LanguageVariant" data-parsoid='{"tSp":[6]}' data-mw-variant='{"twoway":[{"l":"zh","t":"China"},{"l":"zh-Hant-TW","t":"Taiwan"}]}'></span>, not China</p>
+!! end
+
 !! test
 Filter syntax for language variants
 !! options