Merge "Parser: Add guessSectionNameFromStrippedText() and refactor"

[lhc/web/wiklou.git] / includes / parser / Parser.php
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php

index d00b333..2b03a70 100644 (file)
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -2299,10 +2299,7 @@ class Parser {
                                         /**
                                          * Strip the whitespace interwiki links produce, see T10897
                                          */
-                                       $s = rtrim( $s . $prefix );
-                                       // Special case: strip newlines when only thing between
-                                       // this link and next are newlines
-                                       $s .= trim( $trail, "\n" ) === '' ? '' : $trail;
+                                       $s = rtrim( $s . $prefix ) . $trail; # T175416
                                         continue;
                                 }
  
@@ -2330,10 +2327,7 @@ class Parser {
                                         /**
                                          * Strip the whitespace Category links produce, see T2087
                                          */
-                                       $s = rtrim( $s . $prefix ); # T2087, T87753
-                                       // Special case: strip newlines when only thing between
-                                       // this link and next are newlines
-                                       $s .= trim( $trail, "\n" ) === '' ? '' : $trail;
+                                       $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
  
                                         if ( $wasblank ) {
                                                 $sortkey = $this->getDefaultSort();
@@ -2510,10 +2504,10 @@ class Parser {
                                 $value = '|';
                                 break;
                         case 'currentmonth':
-                               $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
+                               $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
                                 break;
                         case 'currentmonth1':
-                               $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
+                               $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
                                 break;
                         case 'currentmonthname':
                                 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
@@ -2525,16 +2519,16 @@ class Parser {
                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
                                 break;
                         case 'currentday':
-                               $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
+                               $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
                                 break;
                         case 'currentday2':
-                               $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
+                               $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
                                 break;
                         case 'localmonth':
-                               $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
+                               $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
                                 break;
                         case 'localmonth1':
-                               $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
+                               $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
                                 break;
                         case 'localmonthname':
                                 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
@@ -2546,10 +2540,10 @@ class Parser {
                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
                                 break;
                         case 'localday':
-                               $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
+                               $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
                                 break;
                         case 'localday2':
-                               $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
+                               $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
                                 break;
                         case 'pagename':
                                 $value = wfEscapeWikiText( $this->mTitle->getText() );
@@ -3950,7 +3944,7 @@ class Parser {
                         $this->mForceTocPosition = true;
  
                         # Set a placeholder. At the end we'll fill it in with the TOC.
-                       $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
+                       $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
  
                         # Only keep the first one.
                         $text = $mw->replace( '', $text );
@@ -4212,6 +4206,9 @@ class Parser {
  
                         # Decode HTML entities
                         $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
+
+                       $safeHeadline = self::normalizeSectionName( $safeHeadline );
+
                         $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
                         $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
                         $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
@@ -4393,7 +4390,7 @@ class Parser {
                 $full .= implode( '', $sections );
  
                 if ( $this->mForceTocPosition ) {
-                       return str_replace( '<!--MWTOC-->', $toc, $full );
+                       return str_replace( '<!--MWTOC\'"-->', $toc, $full );
                 } else {
                         return $full;
                 }
@@ -5759,21 +5756,42 @@ class Parser {
                 return $this->mDefaultSort;
         }
  
+       private static function getSectionNameFromStrippedText( $text ) {
+               $text = Sanitizer::normalizeSectionNameWhitespace( $text );
+               $text = Sanitizer::decodeCharReferences( $text );
+               $text = self::normalizeSectionName( $text );
+               return $text;
+       }
+
+       private static function makeAnchor( $sectionName ) {
+               return '#' . Sanitizer::escapeIdForLink( $sectionName );
+       }
+
+       private static function makeLegacyAnchor( $sectionName ) {
+               global $wgFragmentMode;
+               if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
+                       // ForAttribute() and ForLink() are the same for legacy encoding
+                       $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK );
+               } else {
+                       $id = Sanitizer::escapeIdForLink( $text );
+               }
+
+               return "#$id";
+       }
+
         /**
          * Try to guess the section anchor name based on a wikitext fragment
          * presumably extracted from a heading, for example "Header" from
          * "== Header ==".
          *
          * @param string $text
-        *
-        * @return string
+        * @return string Anchor (starting with '#')
          */
         public function guessSectionNameFromWikiText( $text ) {
                 # Strip out wikitext links(they break the anchor)
                 $text = $this->stripSectionName( $text );
-               $text = Sanitizer::normalizeSectionNameWhitespace( $text );
-               $text = Sanitizer::decodeCharReferences( $text );
-               return '#' . Sanitizer::escapeIdForLink( $text );
+               $sectionName = self::getSectionNameFromStrippedText( $text );
+               return self::makeAnchor( $sectionName );
         }
  
         /**
@@ -5783,24 +5801,41 @@ class Parser {
          * than UTF-8, resulting in breakage.
          *
          * @param string $text The section name
-        * @return string An anchor
+        * @return string Anchor (starting with '#')
          */
         public function guessLegacySectionNameFromWikiText( $text ) {
-               global $wgFragmentMode;
-
                 # Strip out wikitext links(they break the anchor)
                 $text = $this->stripSectionName( $text );
-               $text = Sanitizer::normalizeSectionNameWhitespace( $text );
-               $text = Sanitizer::decodeCharReferences( $text );
+               $sectionName = self::getSectionNameFromStrippedText( $text );
+               return self::makeLegacyAnchor( $sectionName );
+       }
  
-               if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
-                       // ForAttribute() and ForLink() are the same for legacy encoding
-                       $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK );
-               } else {
-                       $id = Sanitizer::escapeIdForLink( $text );
-               }
+       /**
+        * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
+        * @param string $text Section name (plain text)
+        * @return string Anchor (starting with '#')
+        */
+       public static function guessSectionNameFromStrippedText( $text ) {
+               $sectionName = self::getSectionNameFromStrippedText( $text );
+               return self::makeAnchor( $sectionName );
+       }
  
-               return "#$id";
+       /**
+        * Apply the same normalization as code making links to this section would
+        *
+        * @param string $text
+        * @return string
+        */
+       private static function normalizeSectionName( $text ) {
+               # T90902: ensure the same normalization is applied for IDs as to links
+               $titleParser = MediaWikiServices::getInstance()->getTitleParser();
+               try {
+
+                       $parts = $titleParser->splitTitleString( "#$text" );
+               } catch ( MalformedTitleException $ex ) {
+                       return $text;
+               }
+               return $parts['fragment'];
         }
  
         /**