use MediaWiki\MediaWikiServices;
use MediaWiki\Logger\LoggerFactory;
+use MediaWiki\Storage\RevisionRecord;
/**
* Base class for language conversion.
IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
Minimize use of backtracking where possible.
*/
- $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
-
- // this one is needed when the text is inside an HTML markup
- $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
-
- // Optimize for the common case where these tags have
- // few or no children. Thus try and possesively get as much as
- // possible, and only engage in backtracking when we hit a '<'.
-
- // disable convert to variants between <code> tags
- $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
- // disable conversion of <script> tags
- $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
- // disable conversion of <pre> tags
- $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
- // The "|.*+)" at the end, is in case we missed some part of html syntax,
- // we will fail securely (hopefully) by matching the rest of the string.
- $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
-
- $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
- '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+ static $reg;
+ if ( $reg === null ) {
+ $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
+
+ // this one is needed when the text is inside an HTML markup
+ $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
+
+ // Optimize for the common case where these tags have
+ // few or no children. Thus try and possesively get as much as
+ // possible, and only engage in backtracking when we hit a '<'.
+
+ // disable convert to variants between <code> tags
+ $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
+ // disable conversion of <script> tags
+ $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
+ // disable conversion of <pre> tags
+ $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
+ // The "|.*+)" at the end, is in case we missed some part of html syntax,
+ // we will fail securely (hopefully) by matching the rest of the string.
+ $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
+
+ $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
+ '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+ }
$startPos = 0;
$sourceBlob = '';
$literalBlob = '';
// We add a marker (\004) at the end of text, to ensure we always match the
// entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
+ $textWithMarker = $text . "\004";
while ( $startPos < strlen( $text ) ) {
- if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
+ if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
$elementPos = $markupMatches[0][1];
$element = $markupMatches[0][0];
if ( $element === "\004" ) {
$revision = Revision::newFromTitle( $title );
if ( $revision ) {
if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
- $txt = $revision->getContent( Revision::RAW )->getText();
+ $txt = $revision->getContent( RevisionRecord::RAW )->getText();
}
// @todo in the future, use a specialized content model, perhaps based on json!