Merge "Truncate tag filter descriptions"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Wed, 21 Feb 2018 12:52:22 +0000 (12:52 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 21 Feb 2018 12:52:23 +0000 (12:52 +0000)
includes/changetags/ChangeTags.php
includes/specialpage/ChangesListSpecialPage.php
languages/Language.php
tests/phpunit/languages/LanguageTest.php

index 7e4dd00..b30b82d 100644 (file)
@@ -181,6 +181,28 @@ class ChangeTags {
                return $msg;
        }
 
+       /**
+        * Get truncated message for the tag's long description.
+        *
+        * @param string $tag Tag name.
+        * @param int $length Maximum length of truncated message, including ellipsis.
+        * @param IContextSource $context
+        *
+        * @return string Truncated long tag description.
+        */
+       public static function truncateTagDescription( $tag, $length, IContextSource $context ) {
+               $originalDesc = self::tagLongDescriptionMessage( $tag, $context );
+               // If there is no tag description, return empty string
+               if ( !$originalDesc ) {
+                       return '';
+               }
+
+               $taglessDesc = Sanitizer::stripAllTags( $originalDesc->parse() );
+               $escapedDesc = Sanitizer::escapeHtmlAllowEntities( $taglessDesc );
+
+               return $context->getLanguage()->truncateForVisual( $escapedDesc, $length );
+       }
+
        /**
         * Add tags to a change given its rc_id, rev_id and/or log_id
         *
index cf990c2..5aa1c6b 100644 (file)
@@ -33,6 +33,12 @@ use Wikimedia\Rdbms\IDatabase;
  * @ingroup SpecialPage
  */
 abstract class ChangesListSpecialPage extends SpecialPage {
+       /**
+        * Maximum length of a tag description in UTF-8 characters.
+        * Longer descriptions will be truncated.
+        */
+       const TAG_DESC_CHARACTER_LIMIT = 120;
+
        /**
         * Preference name for saved queries. Subclasses that use saved queries should override this.
         * @var string
@@ -794,15 +800,15 @@ abstract class ChangesListSpecialPage extends SpecialPage {
                                                isset( $explicitlyDefinedTags[ $tagName ] ) ||
                                                isset( $softwareActivatedTags[ $tagName ] )
                                        ) {
-                                               // Parse description
-                                               $desc = ChangeTags::tagLongDescriptionMessage( $tagName, $context );
-
                                                $result[] = [
                                                        'name' => $tagName,
                                                        'label' => Sanitizer::stripAllTags(
                                                                ChangeTags::tagDescription( $tagName, $context )
                                                        ),
-                                                       'description' => $desc ? Sanitizer::stripAllTags( $desc->parse() ) : '',
+                                                       'description' =>
+                                                               ChangeTags::truncateTagDescription(
+                                                                       $tagName, self::TAG_DESC_CHARACTER_LIMIT, $context
+                                                               ),
                                                        'cssClass' => Sanitizer::escapeClass( 'mw-tag-' . $tagName ),
                                                        'hits' => $hits,
                                                ];
index 084a2e7..fc8ef87 100644 (file)
@@ -3472,27 +3472,103 @@ class Language {
        }
 
        /**
-        * Truncate a string to a specified length in bytes, appending an optional
-        * string (e.g. for ellipses)
+        * This method is deprecated since 1.31 and kept as alias for truncateForDatabase, which
+        * has replaced it. This method provides truncation suitable for DB.
         *
         * The database offers limited byte lengths for some columns in the database;
         * multi-byte character sets mean we need to ensure that only whole characters
-        * are included, otherwise broken characters can be passed to the user
+        * are included, otherwise broken characters can be passed to the user.
         *
-        * If $length is negative, the string will be truncated from the beginning
+        * @deprecated since 1.31, use truncateForDatabase or truncateForVisual as appropriate.
         *
         * @param string $string String to truncate
-        * @param int $length Maximum length (including ellipses)
+        * @param int $length Maximum length (including ellipsis)
         * @param string $ellipsis String to append to the truncated text
         * @param bool $adjustLength Subtract length of ellipsis from $length.
         *      $adjustLength was introduced in 1.18, before that behaved as if false.
         * @return string
         */
        function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
+               return $this->truncateForDatabase( $string, $length, $ellipsis, $adjustLength );
+       }
+
+       /**
+        * Truncate a string to a specified length in bytes, appending an optional
+        * string (e.g. for ellipsis)
+        *
+        * If $length is negative, the string will be truncated from the beginning
+        *
+        * @since 1.31
+        *
+        * @param string $string String to truncate
+        * @param int $length Maximum length in bytes
+        * @param string $ellipsis String to append to the end of truncated text
+        * @param bool $adjustLength Subtract length of ellipsis from $length
+        *
+        * @return string
+        */
+       function truncateForDatabase( $string, $length, $ellipsis = '...', $adjustLength = true ) {
+               return $this->truncateInternal(
+                       $string, $length, $ellipsis, $adjustLength, 'strlen', 'substr'
+               );
+       }
+
+       /**
+        * Truncate a string to a specified number of characters, appending an optional
+        * string (e.g. for ellipsis).
+        *
+        * This provides multibyte version of truncate() method of this class, suitable for truncation
+        * based on number of characters, instead of number of bytes.
+        *
+        * If $length is negative, the string will be truncated from the beginning.
+        *
+        * @since 1.31
+        *
+        * @param string $string String to truncate
+        * @param int $length Maximum number of characters
+        * @param string $ellipsis String to append to the end of truncated text
+        * @param bool $adjustLength Subtract length of ellipsis from $length
+        *
+        * @return string
+        */
+       function truncateForVisual( $string, $length, $ellipsis = '...', $adjustLength = true ) {
+               // Passing encoding to mb_strlen and mb_substr is optional.
+               // Encoding defaults to mb_internal_encoding(), which is set to UTF-8 in Setup.php, so
+               // explicit specification of encoding is skipped.
+               // Note: Both multibyte methods are callables invoked in truncateInternal.
+               return $this->truncateInternal(
+                       $string, $length, $ellipsis, $adjustLength, 'mb_strlen', 'mb_substr'
+               );
+       }
+
+       /**
+        * Internal method used for truncation. This method abstracts text truncation into
+        * one common method, allowing users to provide length measurement function and
+        * function for finding substring.
+        *
+        * For usages, see truncateForDatabase and truncateForVisual.
+        *
+        * @param string $string String to truncate
+        * @param int $length Maximum length of final text
+        * @param string $ellipsis String to append to the end of truncated text
+        * @param bool $adjustLength Subtract length of ellipsis from $length
+        * @param callable $measureLength Callable function used for determining the length of text
+        * @param callable $getSubstring Callable function used for getting the substrings
+        *
+        * @return string
+        */
+       private function truncateInternal(
+               $string, $length, $ellipsis = '...', $adjustLength = true, $measureLength, $getSubstring
+       ) {
+               if ( !is_callable( $measureLength ) || !is_callable( $getSubstring ) ) {
+                       throw new InvalidArgumentException( 'Invalid callback provided' );
+               }
+
                # Check if there is no need to truncate
-               if ( strlen( $string ) <= abs( $length ) ) {
+               if ( $measureLength( $string ) <= abs( $length ) ) {
                        return $string; // no need to truncate
                }
+
                # Use the localized ellipsis character
                if ( $ellipsis == '...' ) {
                        $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
@@ -3500,31 +3576,33 @@ class Language {
                if ( $length == 0 ) {
                        return $ellipsis; // convention
                }
+
                $stringOriginal = $string;
                # If ellipsis length is >= $length then we can't apply $adjustLength
-               if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
+               if ( $adjustLength && $measureLength( $ellipsis ) >= abs( $length ) ) {
                        $string = $ellipsis; // this can be slightly unexpected
                # Otherwise, truncate and add ellipsis...
                } else {
-                       $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
+                       $ellipsisLength = $adjustLength ? $measureLength( $ellipsis ) : 0;
                        if ( $length > 0 ) {
-                               $length -= $eLength;
-                               $string = substr( $string, 0, $length ); // xyz...
+                               $length -= $ellipsisLength;
+                               $string = $getSubstring( $string, 0, $length ); // xyz...
                                $string = $this->removeBadCharLast( $string );
                                $string = rtrim( $string );
                                $string = $string . $ellipsis;
                        } else {
-                               $length += $eLength;
-                               $string = substr( $string, $length ); // ...xyz
+                               $length += $ellipsisLength;
+                               $string = $getSubstring( $string, $length ); // ...xyz
                                $string = $this->removeBadCharFirst( $string );
                                $string = ltrim( $string );
                                $string = $ellipsis . $string;
                        }
                }
+
                # Do not truncate if the ellipsis makes the string longer/equal (T24181).
                # This check is *not* redundant if $adjustLength, due to the single case where
                # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
-               if ( strlen( $string ) < strlen( $stringOriginal ) ) {
+               if ( $measureLength( $string ) < $measureLength( $stringOriginal ) ) {
                        return $string;
                } else {
                        return $stringOriginal;
index 5cb5602..050ed83 100644 (file)
@@ -209,70 +209,104 @@ class LanguageTest extends LanguageClassesTestCase {
        }
 
        /**
-        * @covers Language::truncate
+        * @covers Language::truncateForDatabase
+        * @covers Language::truncateInternal
         */
-       public function testTruncate() {
+       public function testTruncateForDatabase() {
                $this->assertEquals(
                        "XXX",
-                       $this->getLang()->truncate( "1234567890", 0, 'XXX' ),
+                       $this->getLang()->truncateForDatabase( "1234567890", 0, 'XXX' ),
                        'truncate prefix, len 0, small ellipsis'
                );
 
                $this->assertEquals(
                        "12345XXX",
-                       $this->getLang()->truncate( "1234567890", 8, 'XXX' ),
+                       $this->getLang()->truncateForDatabase( "1234567890", 8, 'XXX' ),
                        'truncate prefix, small ellipsis'
                );
 
                $this->assertEquals(
                        "123456789",
-                       $this->getLang()->truncate( "123456789", 5, 'XXXXXXXXXXXXXXX' ),
+                       $this->getLang()->truncateForDatabase( "123456789", 5, 'XXXXXXXXXXXXXXX' ),
                        'truncate prefix, large ellipsis'
                );
 
                $this->assertEquals(
                        "XXX67890",
-                       $this->getLang()->truncate( "1234567890", -8, 'XXX' ),
+                       $this->getLang()->truncateForDatabase( "1234567890", -8, 'XXX' ),
                        'truncate suffix, small ellipsis'
                );
 
                $this->assertEquals(
                        "123456789",
-                       $this->getLang()->truncate( "123456789", -5, 'XXXXXXXXXXXXXXX' ),
+                       $this->getLang()->truncateForDatabase( "123456789", -5, 'XXXXXXXXXXXXXXX' ),
                        'truncate suffix, large ellipsis'
                );
                $this->assertEquals(
                        "123XXX",
-                       $this->getLang()->truncate( "123                ", 9, 'XXX' ),
+                       $this->getLang()->truncateForDatabase( "123                ", 9, 'XXX' ),
                        'truncate prefix, with spaces'
                );
                $this->assertEquals(
                        "12345XXX",
-                       $this->getLang()->truncate( "12345            8", 11, 'XXX' ),
+                       $this->getLang()->truncateForDatabase( "12345            8", 11, 'XXX' ),
                        'truncate prefix, with spaces and non-space ending'
                );
                $this->assertEquals(
                        "XXX234",
-                       $this->getLang()->truncate( "1              234", -8, 'XXX' ),
+                       $this->getLang()->truncateForDatabase( "1              234", -8, 'XXX' ),
                        'truncate suffix, with spaces'
                );
                $this->assertEquals(
                        "12345XXX",
-                       $this->getLang()->truncate( "1234567890", 5, 'XXX', false ),
+                       $this->getLang()->truncateForDatabase( "1234567890", 5, 'XXX', false ),
                        'truncate without adjustment'
                );
                $this->assertEquals(
                        "泰乐菌...",
-                       $this->getLang()->truncate( "泰乐菌素123456789", 11, '...', false ),
+                       $this->getLang()->truncateForDatabase( "泰乐菌素123456789", 11, '...', false ),
                        'truncate does not chop Unicode characters in half'
                );
                $this->assertEquals(
                        "\n泰乐菌...",
-                       $this->getLang()->truncate( "\n泰乐菌素123456789", 12, '...', false ),
+                       $this->getLang()->truncateForDatabase( "\n泰乐菌素123456789", 12, '...', false ),
                        'truncate does not chop Unicode characters in half if there is a preceding newline'
                );
        }
 
+       /**
+        * @dataProvider provideTruncateData
+        * @covers Language::truncateForVisual
+        * @covers Language::truncateInternal
+        */
+       public function testTruncateForVisual(
+               $expected, $string, $length, $ellipsis = '...', $adjustLength = true
+       ) {
+               $this->assertEquals(
+                       $expected,
+                       $this->getLang()->truncateForVisual( $string, $length, $ellipsis, $adjustLength )
+               );
+       }
+
+       /**
+        * @return array Format is ($expected, $string, $length, $ellipsis, $adjustLength)
+        */
+       public static function provideTruncateData() {
+               return [
+                       [ "XXX", "тестирам да ли ради", 0, "XXX" ],
+                       [ "testnXXX", "testni scenarij", 8, "XXX" ],
+                       [ "حالة اختبار", "حالة اختبار", 5, "XXXXXXXXXXXXXXX" ],
+                       [ "XXXедент", "прецедент", -8, "XXX" ],
+                       [ "XXപിൾ", "ആപ്പിൾ", -5, "XX" ],
+                       [ "神秘XXX", "神秘                ", 9, "XXX" ],
+                       [ "ΔημιουργXXX", "Δημιουργία           Σύμπαντος", 11, "XXX" ],
+                       [ "XXXの家です", "地球は私たちの唯               の家です", -8, "XXX" ],
+                       [ "زندگیXXX", "زندگی زیباست", 6, "XXX", false ],
+                       [ "ცხოვრება...", "ცხოვრება არის საოცარი", 8, "...", false ],
+                       [ "\nທ່ານ...", "\nທ່ານບໍ່ຮູ້ຫນັງສື", 5, "...", false ],
+               ];
+       }
+
        /**
         * @dataProvider provideHTMLTruncateData
         * @covers Language::truncateHTML