Language::truncate(): don't chop up multibyte characters when input contains newlines

author Roan Kattouw <roan.kattouw@gmail.com>

Tue, 27 Oct 2015 03:17:37 +0000 (20:17 -0700)

committer Roan Kattouw <roan.kattouw@gmail.com>

Tue, 27 Oct 2015 03:17:37 +0000 (20:17 -0700)
author Roan Kattouw <roan.kattouw@gmail.com>
Tue, 27 Oct 2015 03:17:37 +0000 (20:17 -0700)
committer Roan Kattouw <roan.kattouw@gmail.com>
Tue, 27 Oct 2015 03:17:37 +0000 (20:17 -0700)
diff --git a/languages/Language.php b/languages/Language.php

index 50ed513..3ea2693 100644 (file)
--- a/languages/Language.php
+++ b/languages/Language.php
@@ -3691,8 +3691,9 @@ class Language {
                                 # We got the first byte only of a multibyte char; remove it.
                                 $string = substr( $string, 0, -1 );
                         } elseif ( $char >= 0x80 &&
+                               // Use the /s modifier (PCRE_DOTALL) so (.*) also matches newlines
                                 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
-                                       '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m )
+                                       '[\xf0-\xf7][\x80-\xbf]{1,2})$/s', $string, $m )
                         ) {
                                 # We chopped in the middle of a character; remove it
                                 $string = $m[1];
diff --git a/tests/phpunit/languages/LanguageTest.php b/tests/phpunit/languages/LanguageTest.php

index 4fca002..77c3c02 100644 (file)
--- a/tests/phpunit/languages/LanguageTest.php
+++ b/tests/phpunit/languages/LanguageTest.php
@@ -261,6 +261,16 @@ class LanguageTest extends LanguageClassesTestCase {
                         $this->getLang()->truncate( "1234567890", 5, 'XXX', false ),
                         'truncate without adjustment'
                 );
+               $this->assertEquals(
+                       "泰乐菌...",
+                       $this->getLang()->truncate( "泰乐菌素123456789", 11, '...', false ),
+                       'truncate does not chop Unicode characters in half'
+               );
+               $this->assertEquals(
+                       "\n泰乐菌...",
+                       $this->getLang()->truncate( "\n泰乐菌素123456789", 12, '...', false ),
+                       'truncate does not chop Unicode characters in half if there is a preceding newline'
+               );
         }
  
         /**
author	Roan Kattouw <roan.kattouw@gmail.com>
	Tue, 27 Oct 2015 03:17:37 +0000 (20:17 -0700)
committer	Roan Kattouw <roan.kattouw@gmail.com>
	Tue, 27 Oct 2015 03:17:37 +0000 (20:17 -0700)
languages/Language.php		patch \| blob \| history
tests/phpunit/languages/LanguageTest.php		patch \| blob \| history