From e9d523b9bd4531755276759e9c8bb2103dc1198e Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Thu, 3 Sep 2015 14:46:48 +1000 Subject: [PATCH] Add Html5Depurate tidy driver Also document input format for MWTidy::tidy(). Change-Id: I77071d3db0524695c2baf9a4670ca2455438c83d --- autoload.php | 1 + includes/parser/MWTidy.php | 6 ++++- includes/tidy/Html5Depurate.php | 45 +++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 includes/tidy/Html5Depurate.php diff --git a/autoload.php b/autoload.php index 62d6f09cf8..4db3ec7ec7 100644 --- a/autoload.php +++ b/autoload.php @@ -760,6 +760,7 @@ $wgAutoloadLocalClasses = array( 'MediaWiki\\Logger\\Monolog\\WikiProcessor' => __DIR__ . '/includes/debug/logger/monolog/WikiProcessor.php', 'MediaWiki\\Logger\\NullSpi' => __DIR__ . '/includes/debug/logger/NullSpi.php', 'MediaWiki\\Logger\\Spi' => __DIR__ . '/includes/debug/logger/Spi.php', + 'MediaWiki\\Tidy\\Html5Depurate' => __DIR__ . '/includes/tidy/Html5Depurate.php', 'MediaWiki\\Tidy\\RaggettBase' => __DIR__ . '/includes/tidy/RaggettBase.php', 'MediaWiki\\Tidy\\RaggettExternal' => __DIR__ . '/includes/tidy/RaggettExternal.php', 'MediaWiki\\Tidy\\RaggettInternalHHVM' => __DIR__ . '/includes/tidy/RaggettInternalHHVM.php', diff --git a/includes/parser/MWTidy.php b/includes/parser/MWTidy.php index d0e50bc707..807842b61c 100644 --- a/includes/parser/MWTidy.php +++ b/includes/parser/MWTidy.php @@ -38,7 +38,8 @@ class MWTidy { * If tidy isn't able to correct the markup, the original will be * returned in all its glory with a warning comment appended. * - * @param string $text Hideous HTML input + * @param string $text HTML input fragment. This should not contain a + * or tag. * @return string Corrected HTML output */ public static function tidy( $text ) { @@ -110,6 +111,9 @@ class MWTidy { case 'RaggettExternal': self::$instance = new MediaWiki\Tidy\RaggettExternal( $config ); break; + case 'Html5Depurate': + self::$instance = new MediaWiki\Tidy\Html5Depurate( $config ); + break; default: throw new MWException( "Invalid tidy driver: \"{$config['driver']}\"" ); } diff --git a/includes/tidy/Html5Depurate.php b/includes/tidy/Html5Depurate.php new file mode 100644 index 0000000000..23e445fa6b --- /dev/null +++ b/includes/tidy/Html5Depurate.php @@ -0,0 +1,45 @@ + 'http://localhost:4339/document', + 'timeout' => 10, + 'connectTimeout' => 0.5, + ) ); + } + + public function tidy( $text ) { + $wrappedtext = '' . + '' . $text . ''; + + $req = MWHttpRequest::factory( $this->config['url'], + array( + 'method' => 'POST', + 'timeout' => $this->config['timeout'], + 'connectTimeout' => $this->config['connectTimeout'], + 'postData' => array( + 'text' => $wrappedtext + ) + ) ); + $status = $req->execute(); + if ( !$status->isOK() ) { + throw new Exception( "Error contacting depurate service: " . $status->getWikiText() ); + } elseif ( $req->getStatus() !== 200 ) { + throw new Exception( "Depurate returned error: " . $status->getWikiText() ); + } + $result = $req->getContent(); + $startBody = strpos( $result, "" ); + $endBody = strrpos( $result, "" ); + if ( $startBody !== false && $endBody !== false && $endBody > $startBody ) { + $startBody += strlen( "" ); + return substr( $result, $startBody, $endBody - $startBody ); + } else { + return $text . "\n"; + } + } +} -- 2.20.1