Rewrite Language::hebrewNumeral()
authorAmir E. Aharoni <amir.aharoni@mail.huji.ac.il>
Thu, 21 May 2015 23:26:05 +0000 (02:26 +0300)
committerNiklas Laxström <niklas.laxstrom@gmail.com>
Tue, 2 Jun 2015 13:10:52 +0000 (15:10 +0200)
Use arrays instead of strings, to avoid using
string functions with Unicode.

Handle thousands according to how years like 1000, 2000, etc.
are named in the Hebrew Wikipedia.

Bug: T97444
Change-Id: I5334e86793d28dfcf8939a249b03a5ea85fa4e69

languages/Language.php
tests/phpunit/languages/LanguageTest.php

index ce779f7..d19dc25 100644 (file)
@@ -2034,7 +2034,18 @@ class Language {
                static $table = array(
                        array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
                        array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
-                       array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
+                       array( '',
+                               array( 'ק' ),
+                               array( 'ר' ),
+                               array( 'ש' ),
+                               array( 'ת' ),
+                               array( 'ת', 'ק' ),
+                               array( 'ת', 'ר' ),
+                               array( 'ת', 'ש' ),
+                               array( 'ת', 'ת' ),
+                               array( 'ת', 'ת', 'ק' ),
+                               array( 'ת', 'ת', 'ר' ),
+                       ),
                        array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
                );
 
@@ -2043,16 +2054,29 @@ class Language {
                        return $num;
                }
 
-               $s = '';
+               // Round thousands have special notations
+               if ( $num === 1000 ) {
+                       return "א' אלף";
+               } elseif ( $num % 1000 === 0 ) {
+                       return $table[0][$num / 1000] . "' אלפים";
+               }
+
+               $letters = array();
+
                for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
                        if ( $num >= $pow10 ) {
                                if ( $num === 15 || $num === 16 ) {
-                                       $s .= $table[0][9] . $table[0][$num - 9];
+                                       $letters[] = $table[0][9];
+                                       $letters[] = $table[0][$num - 9];
                                        $num = 0;
                                } else {
-                                       $s .= $table[$i][intval( ( $num / $pow10 ) )];
+                                       $letters = array_merge(
+                                               $letters,
+                                               (array)$table[$i][intval( $num / $pow10 )]
+                                       );
+
                                        if ( $pow10 === 1000 ) {
-                                               $s .= "'";
+                                               $letters[] = "'";
                                        }
                                }
                        }
@@ -2060,35 +2084,29 @@ class Language {
                        $num = $num % $pow10;
                }
 
-               if ( strlen( $s ) === 2 ) {
-                       $str = $s . "'";
+               $preTransformLength = count( $letters );
+               if ( $preTransformLength === 1 ) {
+                       // Add geresh (single quote) to one-letter numbers
+                       $letters[] = "'";
                } else {
-                       $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
-                       $str .= substr( $s, strlen( $s ) - 2, 2 );
-               }
-
-               $start = substr( $str, 0, strlen( $str ) - 2 );
-               $end = substr( $str, strlen( $str ) - 2 );
+                       $lastIndex = $preTransformLength - 1;
+                       $letters[$lastIndex] = str_replace(
+                               array( 'כ', 'מ', 'נ', 'פ', 'צ' ),
+                               array( 'ך', 'ם', 'ן', 'ף', 'ץ' ),
+                               $letters[$lastIndex]
+                       );
 
-               switch ( $end ) {
-                       case 'כ':
-                               $str = $start . 'ך';
-                               break;
-                       case 'מ':
-                               $str = $start . 'ם';
-                               break;
-                       case 'נ':
-                               $str = $start . 'ן';
-                               break;
-                       case 'פ':
-                               $str = $start . 'ף';
-                               break;
-                       case 'צ':
-                               $str = $start . 'ץ';
-                               break;
+                       // Add gershayim (double quote) to multiple-letter numbers,
+                       // but exclude numbers with only one letter after the thousands
+                       // (1001-1009, 1020, 1030, 2001-2009, etc.)
+                       if ( $letters[1] === "'" && $preTransformLength === 3 ) {
+                               $letters[] = "'";
+                       } else {
+                               array_splice( $letters, -1, 0, '"' );
+                       }
                }
 
-               return $str;
+               return implode( $letters );
        }
 
        /**
index a867834..faa30c3 100644 (file)
@@ -1461,9 +1461,18 @@ class LanguageTest extends LanguageClassesTestCase {
                        array( 400, "ת'" ),
                        array( 500, 'ת"ק' ),
                        array( 800, 'ת"ת' ),
-                       #array( 1000, "א'" ),
-                       #array( 1001, "א'א" ),
+                       array( 1000, "א' אלף" ),
+                       array( 1001, "א'א'" ),
+                       array( 1012, "א'י\"ב" ),
+                       array( 1020, "א'ך'" ),
+                       array( 1030, "א'ל'" ),
+                       array( 1081, "א'פ\"א" ),
+                       array( 2000, "ב' אלפים" ),
+                       array( 2016, "ב'ט\"ז" ),
+                       array( 3000, "ג' אלפים" ),
+                       array( 4000, "ד' אלפים" ),
                        array( 4904, "ד'תתק\"ד" ),
+                       array( 5000, "ה' אלפים" ),
                        array( 5680, "ה'תר\"ף" ),
                        array( 5690, "ה'תר\"ץ" ),
                        array( 5708, "ה'תש\"ח" ),