Merge "profiler: Centralise output responsibility from ProfilerOutputText to Profiler"

[lhc/web/wiklou.git] / languages / LanguageConverter.php
diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php

index c5ff9d6..61a967d 100644 (file)
--- a/languages/LanguageConverter.php
+++ b/languages/LanguageConverter.php
@@ -21,6 +21,7 @@
  use MediaWiki\MediaWikiServices;
  
  use MediaWiki\Logger\LoggerFactory;
+use MediaWiki\Storage\RevisionRecord;
  
  /**
   * Base class for language conversion.
@@ -391,27 +392,30 @@ class LanguageConverter {
                    IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
                    Minimize use of backtracking where possible.
                 */
-               $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
-
-               // this one is needed when the text is inside an HTML markup
-               $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
-
-               // Optimize for the common case where these tags have
-               // few or no children. Thus try and possesively get as much as
-               // possible, and only engage in backtracking when we hit a '<'.
-
-               // disable convert to variants between <code> tags
-               $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
-               // disable conversion of <script> tags
-               $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
-               // disable conversion of <pre> tags
-               $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
-               // The "|.*+)" at the end, is in case we missed some part of html syntax,
-               // we will fail securely (hopefully) by matching the rest of the string.
-               $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
-
-               $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
-                       '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+               static $reg;
+               if ( $reg === null ) {
+                       $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
+
+                       // this one is needed when the text is inside an HTML markup
+                       $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
+
+                       // Optimize for the common case where these tags have
+                       // few or no children. Thus try and possesively get as much as
+                       // possible, and only engage in backtracking when we hit a '<'.
+
+                       // disable convert to variants between <code> tags
+                       $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
+                       // disable conversion of <script> tags
+                       $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
+                       // disable conversion of <pre> tags
+                       $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
+                       // The "|.*+)" at the end, is in case we missed some part of html syntax,
+                       // we will fail securely (hopefully) by matching the rest of the string.
+                       $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
+
+                       $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
+                                '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+               }
                 $startPos = 0;
                 $sourceBlob = '';
                 $literalBlob = '';
@@ -426,8 +430,9 @@ class LanguageConverter {
  
                 // We add a marker (\004) at the end of text, to ensure we always match the
                 // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
+               $textWithMarker = $text . "\004";
                 while ( $startPos < strlen( $text ) ) {
-                       if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
+                       if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
                                 $elementPos = $markupMatches[0][1];
                                 $element = $markupMatches[0][0];
                                 if ( $element === "\004" ) {
@@ -1039,7 +1044,7 @@ class LanguageConverter {
                                 $revision = Revision::newFromTitle( $title );
                                 if ( $revision ) {
                                         if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
-                                               $txt = $revision->getContent( Revision::RAW )->getText();
+                                               $txt = $revision->getContent( RevisionRecord::RAW )->getText();
                                         }
  
                                         // @todo in the future, use a specialized content model, perhaps based on json!